1. Packages

# Load required packages
library(Seurat)
library(scDblFinder)
library(SingleR)
library(celldex)
library(SingleCellExperiment)
library(ggplot2)
library(clusterProfiler)
library(org.Mm.eg.db)
library(stringr)
library(dplyr)
library(patchwork)
library(cacoa)
library(ggrepel)
library(RColorBrewer)
library(ggalluvial)
library(fgsea)
library(msigdbr)
library(reshape2)
library(compositions)
cat("All packages loaded successfully!\n")
## All packages loaded successfully!

2. Global Parameters

cat("\n=== SETTING GLOBAL ANALYSIS PARAMETERS ===\n\n")
## 
## === SETTING GLOBAL ANALYSIS PARAMETERS ===
# =============================================================================
# GLOBAL ANALYSIS PARAMETERS
# =============================================================================

PARAMS <- list(
  
  # --- Reproducibility ---
  random_seed = 1234,
  
  # --- QC Thresholds ---
  qc = list(
    min_features = 500,              # Minimum genes per cell
    max_mt_spleen = 10,              # Max mitochondrial % for spleen
    max_mt_bonemarrow = 12,          # Max mitochondrial % for bone marrow
    min_complexity = 0.8,            # Minimum log10(genes)/log10(UMI)
    doublet_method = "scDblFinder"   # Doublet detection method
  ),
  
  # --- Normalization & Integration ---
  normalization = list(
    method = "LogNormalize",
    scale_factor = 10000,
    n_variable_features = 2000
  ),
  
  # --- Dimensionality Reduction ---
  pca = list(
    npcs = 50,                       # Number of PCs to compute
    dims_use = 1:30                  # PCs to use for downstream analysis
  ),
  
  umap = list(
    dims = 1:30,                     # PCs to use for UMAP
    seed = 1234
  ),
  
  # --- Clustering ---
  clustering = list(
    resolution = 0.5,                # Clustering resolution
    algorithm = 1,                   # 1 = Louvain
    dims = 1:30,                     # PCs to use for clustering
    seed = 1234
  ),
  
  # --- Cell Cycle Regression ---
  cell_cycle = list(
    regress = TRUE,                  # Whether to regress out cell cycle
    vars_to_regress = c("S.Score", "G2M.Score")
  ),
  
  # --- Differential Expression ---
  deg = list(
    test_use = "wilcox",             # Statistical test
    min_pct = 0.1,                   # Min fraction of cells expressing
    logfc_threshold = 0.25,          # Min log2FC threshold
    min_cells_per_group = 3,         # Min cells per condition
    padj_cutoff = 0.05               # Adjusted p-value cutoff
  ),
  
  # --- GO Enrichment ---
  go = list(
    ont = "BP",                      # Biological Process
    pvalue_cutoff = 0.05,
    qvalue_cutoff = 0.2,
    min_genes = 5                    # Min genes for enrichment
  ),
  
  # --- GSEA ---
  gsea = list(
    min_size = 10,                   # Min genes in pathway
    max_size = 500,                  # Max genes in pathway
    padj_cutoff = 0.25,
    top_n_plot = 20                  # Number of pathways to plot
  ),
  
  # --- Compositional Analysis ---
  compositional = list(
    test_method = "fisher",          # Fisher's exact test
    padj_cutoff = 0.05
  ),
  
  # --- CACOA ---
  cacoa = list(
    n_pseudo_reps = 3,               # Number of pseudo-replicates
    ref_level = "WT",
    target_level = "KO",
    n_cores = 4
  ),
  
  # --- Cross-Tissue Comparison ---
  comparison = list(
    min_common_genes = 10,           # Min genes for correlation
    correlation_method = "pearson"
  ),
  
  # --- Output ---
  output = list(
    base_dir = "results",
    save_intermediate = FALSE,       # Save intermediate objects
    figure_format = c("pdf", "png"),
    figure_dpi = 300,
    figure_width = 12,
    figure_height = 10
  )
)

# Print key parameters
cat("Key Analysis Parameters:\n")
## Key Analysis Parameters:
cat("------------------------\n")
## ------------------------
cat(sprintf("Random seed: %d\n", PARAMS$random_seed))
## Random seed: 1234
cat(sprintf("Min features: %d\n", PARAMS$qc$min_features))
## Min features: 500
cat(sprintf("Max MT%% (spleen): %d%%\n", PARAMS$qc$max_mt_spleen))
## Max MT% (spleen): 10%
cat(sprintf("Max MT%% (bone marrow): %d%%\n", PARAMS$qc$max_mt_bonemarrow))
## Max MT% (bone marrow): 12%
cat(sprintf("Clustering resolution: %.2f\n", PARAMS$clustering$resolution))
## Clustering resolution: 0.50
cat(sprintf("DEG test: %s (padj < %.3f)\n", PARAMS$deg$test_use, PARAMS$deg$padj_cutoff))
## DEG test: wilcox (padj < 0.050)
cat("\n")
# Save parameters for reproducibility
saveRDS(PARAMS, file.path(PARAMS$output$base_dir, "analysis_parameters.rds"))

cat("✓ Parameters saved to results/\n\n")
## ✓ Parameters saved to results/

3. Data Import

cat("\n=== IMPORTING DATA ===\n\n")
## 
## === IMPORTING DATA ===
# Define sample paths
sample_paths <- list(
  spl_WT = "DATA/WT_SPsample_filtered_feature_bc_matrix.h5",
  spl_KO = "DATA/KO_SP_sample_filtered_feature_bc_matrix.h5",
  bm_WT = "DATA/WT_BM_sample_filtered_feature_bc_matrix.h5",
  bm_KO = "DATA/KO_BM_sample_filtered_feature_bc_matrix.h5"
)

# Import data with error handling
import_with_check <- function(path, name) {
  tryCatch({
    data <- Read10X_h5(path)
    cat(paste0("  ✓ ", name, ": ", ncol(data), " cells\n"))
    return(data)
  }, error = function(e) {
    stop(paste0("Failed to read ", name, " from ", path, ": ", e$message))
  })
}

spl_WT <- import_with_check(sample_paths$spl_WT, "Spleen WT")
##   ✓ Spleen WT: 4055 cells
spl_KO <- import_with_check(sample_paths$spl_KO, "Spleen KO")
##   ✓ Spleen KO: 3418 cells
bm_WT <- import_with_check(sample_paths$bm_WT, "Bone Marrow WT")
##   ✓ Bone Marrow WT: 4162 cells
bm_KO <- import_with_check(sample_paths$bm_KO, "Bone Marrow KO")
##   ✓ Bone Marrow KO: 5575 cells
cat("\n✓ Data imported successfully!\n\n")
## 
## ✓ Data imported successfully!

4. QC Functions

# =============================================================================
# QC FUNCTIONS
# =============================================================================

# -----------------------------------------------------------------------------
# Run scDblFinder
# -----------------------------------------------------------------------------
run_scdblfinder <- function(seurat_obj) {
  sce <- as.SingleCellExperiment(seurat_obj)
  sce <- scDblFinder(sce)
  seurat_obj$doublet_score <- sce$scDblFinder.score
  seurat_obj$doublet_class <- sce$scDblFinder.class
  return(seurat_obj)
}

# -----------------------------------------------------------------------------
# Advanced QC filtering
# -----------------------------------------------------------------------------
apply_qc_filtering <- function(seurat_obj, 
                                sample_name = "Sample",
                                min_features = 500,
                                max_features = NULL,
                                max_count = NULL,
                                max_mt = 12,
                                min_complexity = 0.75) {
  
  cat(paste0("\n=== QC FILTERING: ", sample_name, " ===\n\n"))
  
  # Auto-detect upper limits
  if (is.null(max_features)) {
    max_features <- quantile(seurat_obj$nFeature_RNA, 0.995)
  }
  
  if (is.null(max_count)) {
    max_count <- quantile(seurat_obj$nCount_RNA, 0.995)
  }
  
  # Calculate complexity
  seurat_obj$log10GenesPerUMI <- log10(seurat_obj$nFeature_RNA) / 
                                  log10(seurat_obj$nCount_RNA)
  
  # Count failures
  n_before <- ncol(seurat_obj)
  
  fail_stats <- list(
    low_features = sum(seurat_obj$nFeature_RNA <= min_features),
    high_features = sum(seurat_obj$nFeature_RNA >= max_features),
    high_count = sum(seurat_obj$nCount_RNA >= max_count),
    high_mt = sum(seurat_obj$percent.mt >= max_mt),
    low_complexity = sum(seurat_obj$log10GenesPerUMI <= min_complexity),
    doublets = sum(seurat_obj$doublet_class == "doublet")
  )
  
  cat("Cells failing filters:\n")
  for (name in names(fail_stats)) {
    cat(sprintf("  %s: %d\n", name, fail_stats[[name]]))
  }
  
  # Apply filters
  seurat_obj <- subset(seurat_obj,
                       subset = nFeature_RNA > min_features &
                                nFeature_RNA < max_features &
                                nCount_RNA < max_count &
                                percent.mt < max_mt &
                                log10GenesPerUMI > min_complexity &
                                doublet_class == "singlet")
  
  n_after <- ncol(seurat_obj)
  n_removed <- n_before - n_after
  pct_removed <- round(100 * n_removed / n_before, 2)
  
  cat(sprintf("\nSummary:\n"))
  cat(sprintf("  Before: %d cells\n", n_before))
  cat(sprintf("  After: %d cells\n", n_after))
  cat(sprintf("  Removed: %d (%.2f%%)\n\n", n_removed, pct_removed))
  
  return(seurat_obj)
}

# -----------------------------------------------------------------------------
# QC visualization
# -----------------------------------------------------------------------------
plot_qc_summary <- function(seurat_obj, sample_name = "Sample") {
  
  metadata <- seurat_obj@meta.data
  
  # Summary statistics
  stats_df <- data.frame(
    Sample = sample_name,
    Total_cells = nrow(metadata),
    Singlets = sum(metadata$doublet_class == "singlet", na.rm = TRUE),
    Doublets = sum(metadata$doublet_class == "doublet", na.rm = TRUE),
    Median_UMI = median(metadata$nCount_RNA),
    Median_genes = median(metadata$nFeature_RNA),
    Median_MT = round(median(metadata$percent.mt), 2),
    stringsAsFactors = FALSE
  )
  
  # Violin plots
  p_violins <- VlnPlot(seurat_obj, 
                       features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
                       ncol = 4,
                       pt.size = 0.1) & 
    theme(plot.title = element_text(size = 10))
  
  # Doublet plots
  if ("doublet_class" %in% colnames(metadata)) {
    p_doublet <- ggplot(metadata, aes(x = doublet_score, fill = doublet_class)) +
      geom_histogram(bins = 50, alpha = 0.7, position = "identity") +
      scale_fill_manual(values = c("singlet" = "#2ecc71", "doublet" = "#e74c3c")) +
      theme_classic() +
      labs(title = paste0(sample_name, " - Doublet Detection"),
           x = "Doublet Score", y = "Count")
  } else {
    p_doublet <- NULL
  }
  
  return(list(
    stats = stats_df,
    violins = p_violins,
    doublet = p_doublet
  ))
}

cat("✓ QC functions loaded\n")
## ✓ QC functions loaded

5. QC Metrics and Filtering

cat("\n=== CALCULATING QC METRICS ===\n\n")
## 
## === CALCULATING QC METRICS ===
# Create Seurat objects
samples <- list(
  spl_WT = CreateSeuratObject(counts = spl_WT, project = "spl_WT"),
  spl_KO = CreateSeuratObject(counts = spl_KO, project = "spl_KO"),
  bm_WT = CreateSeuratObject(counts = bm_WT, project = "bm_WT"),
  bm_KO = CreateSeuratObject(counts = bm_KO, project = "bm_KO")
)

# Calculate QC metrics
samples <- lapply(samples, function(obj) {
  obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^mt-")
  obj[["percent.rb"]] <- PercentageFeatureSet(obj, pattern = "^Rp[sl]")
  obj <- run_scdblfinder(obj)
  return(obj)
})

# Unpack
spl_WT_seurat <- samples$spl_WT
spl_KO_seurat <- samples$spl_KO
bm_WT_seurat <- samples$bm_WT
bm_KO_seurat <- samples$bm_KO

# Generate QC summaries
qc_summaries <- list(
  spl_WT = plot_qc_summary(spl_WT_seurat, "Spleen WT"),
  spl_KO = plot_qc_summary(spl_KO_seurat, "Spleen KO"),
  bm_WT = plot_qc_summary(bm_WT_seurat, "Bone Marrow WT"),
  bm_KO = plot_qc_summary(bm_KO_seurat, "Bone Marrow KO")
)

# Combine stats
qc_summary_table <- do.call(rbind, lapply(qc_summaries, function(x) x$stats))
print(qc_summary_table)
##                Sample Total_cells Singlets Doublets Median_UMI Median_genes
## spl_WT      Spleen WT        4055     3750      305     3647.0       1716.0
## spl_KO      Spleen KO        3418     3184      234     4412.5       2035.5
## bm_WT  Bone Marrow WT        4162     3842      320     4606.5       1749.5
## bm_KO  Bone Marrow KO        5575     5011      564     5921.0       2118.0
##        Median_MT
## spl_WT      2.58
## spl_KO      2.54
## bm_WT       1.32
## bm_KO       1.52
# Save
write.csv(qc_summary_table, "results/qc_summary_statistics.csv", row.names = FALSE)

# Display plots
for (name in names(qc_summaries)) {
  print(qc_summaries[[name]]$violins)
  if (!is.null(qc_summaries[[name]]$doublet)) {
    print(qc_summaries[[name]]$doublet)
  }
}

cat("\n✓ QC metrics calculated\n")
## 
## ✓ QC metrics calculated
cat("\n=== APPLYING QC FILTERS ===\n\n")
## 
## === APPLYING QC FILTERS ===
# Apply filtering with tissue-specific parameters
spl_WT_seurat <- apply_qc_filtering(
  spl_WT_seurat,
  sample_name = "Spleen WT",
  min_features = PARAMS$qc$min_features,
  max_mt = PARAMS$qc$max_mt_spleen,
  min_complexity = PARAMS$qc$min_complexity
)
## 
## === QC FILTERING: Spleen WT ===
## 
## Cells failing filters:
##   low_features: 300
##   high_features: 21
##   high_count: 21
##   high_mt: 188
##   low_complexity: 163
##   doublets: 305
## 
## Summary:
##   Before: 4055 cells
##   After: 3226 cells
##   Removed: 829 (20.44%)
spl_KO_seurat <- apply_qc_filtering(
  spl_KO_seurat,
  sample_name = "Spleen KO",
  min_features = PARAMS$qc$min_features,
  max_mt = PARAMS$qc$max_mt_spleen,
  min_complexity = PARAMS$qc$min_complexity
)
## 
## === QC FILTERING: Spleen KO ===
## 
## Cells failing filters:
##   low_features: 164
##   high_features: 18
##   high_count: 18
##   high_mt: 150
##   low_complexity: 49
##   doublets: 234
## 
## Summary:
##   Before: 3418 cells
##   After: 2862 cells
##   Removed: 556 (16.27%)
bm_WT_seurat <- apply_qc_filtering(
  bm_WT_seurat,
  sample_name = "Bone Marrow WT",
  min_features = PARAMS$qc$min_features,
  max_mt = PARAMS$qc$max_mt_bonemarrow,
  min_complexity = PARAMS$qc$min_complexity
)
## 
## === QC FILTERING: Bone Marrow WT ===
## 
## Cells failing filters:
##   low_features: 512
##   high_features: 21
##   high_count: 21
##   high_mt: 234
##   low_complexity: 82
##   doublets: 320
## 
## Summary:
##   Before: 4162 cells
##   After: 3207 cells
##   Removed: 955 (22.95%)
bm_KO_seurat <- apply_qc_filtering(
  bm_KO_seurat,
  sample_name = "Bone Marrow KO",
  min_features = PARAMS$qc$min_features,
  max_mt = PARAMS$qc$max_mt_bonemarrow,
  min_complexity = PARAMS$qc$min_complexity
)
## 
## === QC FILTERING: Bone Marrow KO ===
## 
## Cells failing filters:
##   low_features: 405
##   high_features: 28
##   high_count: 28
##   high_mt: 207
##   low_complexity: 125
##   doublets: 564
## 
## Summary:
##   Before: 5575 cells
##   After: 4429 cells
##   Removed: 1146 (20.56%)
# Clear large objects
rm(spl_WT, spl_KO, bm_WT, bm_KO, samples)
gc()
##             used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  14586109 779.0   24567801 1312.1         NA  24567801 1312.1
## Vcells 129153872 985.4  303486608 2315.5      16384 299875187 2287.9
cat("✓ QC filtering complete\n")
## ✓ QC filtering complete

6. Analysis Functions

cat("\n=== LOADING ANALYSIS FUNCTIONS ===\n\n")
## 
## === LOADING ANALYSIS FUNCTIONS ===
# =============================================================================
# ANNOTATION
# =============================================================================

annotate_immune_cells <- function(seurat_obj) {
  cat("Running SingleR annotation...\n")
  
  DefaultAssay(seurat_obj) <- "RNA"
  if (length(Layers(seurat_obj, assay = "RNA")) > 1) {
    seurat_obj <- JoinLayers(seurat_obj, assay = "RNA")
  }
  
  sce <- as.SingleCellExperiment(seurat_obj)
  immgen_ref <- celldex::ImmGenData()
  
  singler_results <- SingleR(
    test = sce,
    ref = immgen_ref,
    labels = immgen_ref$label.fine
  )
  
  seurat_obj$SingleR_labels <- singler_results$labels
  seurat_obj$SingleR_clean <- gsub("\\s*\\([^\\)]+\\)", "", singler_results$labels)
  seurat_obj$SingleR_confidence <- apply(singler_results$scores, 1, max)
  
  cat(paste0("✓ Identified ", length(unique(seurat_obj$SingleR_clean)), " cell types\n"))
  
  return(seurat_obj)
}

# =============================================================================
# DIFFERENTIAL EXPRESSION
# =============================================================================

find_degs_per_celltype <- function(seurat_obj, 
                                    celltype_column = "SingleR_clean",
                                    condition_column = "condition", 
                                    min_cells = 3,
                                    params = PARAMS) {
  
  cat("\n=== DIFFERENTIAL EXPRESSION ANALYSIS ===\n\n")
  
  DefaultAssay(seurat_obj) <- "RNA"
  seurat_obj <- JoinLayers(seurat_obj, assay = "RNA")
  
  cell_types <- unique(seurat_obj@meta.data[[celltype_column]])
  deg_list <- list()
  
  for (ct in cell_types) {
    cells_ct <- rownames(seurat_obj@meta.data[seurat_obj@meta.data[[celltype_column]] == ct, ])
    n_wt <- sum(seurat_obj@meta.data[cells_ct, condition_column] == params$cacoa$ref_level)
    n_ko <- sum(seurat_obj@meta.data[cells_ct, condition_column] == params$cacoa$target_level)
    
    if (n_wt < min_cells | n_ko < min_cells) {
      cat(paste0("  Skipping ", ct, " (insufficient cells)\n"))
      next
    }
    
    cat(paste0("  ", ct, " (WT=", n_wt, ", KO=", n_ko, ")...\n"))
    
    seurat_subset <- subset(seurat_obj, cells = cells_ct)
    
    markers <- FindMarkers(
      seurat_subset,
      ident.1 = params$cacoa$target_level,
      ident.2 = params$cacoa$ref_level,
      group.by = condition_column,
      min.pct = params$deg$min_pct,
      logfc.threshold = params$deg$logfc_threshold,
      test.use = params$deg$test_use,
      verbose = FALSE
    )
    
    if (nrow(markers) > 0) {
      markers$gene <- rownames(markers)
      markers$cell_type <- ct
      markers$sig <- ifelse(markers$p_val_adj < params$deg$padj_cutoff, "Significant", "Not Significant")
      deg_list[[ct]] <- markers
      
      n_up <- sum(markers$avg_log2FC > 0 & markers$p_val_adj < params$deg$padj_cutoff)
      n_down <- sum(markers$avg_log2FC < 0 & markers$p_val_adj < params$deg$padj_cutoff)
      cat(paste0("    ↑ ", n_up, " | ↓ ", n_down, "\n"))
    }
  }
  
  cat("\n✓ DEG analysis complete\n\n")
  return(deg_list)
}

# =============================================================================
# GO ENRICHMENT
# =============================================================================

run_go_enrichment <- function(deg_list, params = PARAMS) {
  cat("\n=== GO ENRICHMENT ===\n\n")
  
  go_results <- list()
  
  for (ct in names(deg_list)) {
    degs <- deg_list[[ct]]
    
    up_genes <- degs$gene[degs$avg_log2FC > 0 & degs$p_val_adj < params$deg$padj_cutoff]
    down_genes <- degs$gene[degs$avg_log2FC < 0 & degs$p_val_adj < params$deg$padj_cutoff]
    
    if (length(up_genes) >= params$go$min_genes) {
      cat(paste0("  ", ct, " - UP (n=", length(up_genes), ")\n"))
      go_up <- enrichGO(
        gene = up_genes,
        OrgDb = org.Mm.eg.db,
        keyType = "SYMBOL",
        ont = params$go$ont,
        pAdjustMethod = "BH",
        pvalueCutoff = params$go$pvalue_cutoff,
        qvalueCutoff = params$go$qvalue_cutoff
      )
      if (!is.null(go_up) && nrow(as.data.frame(go_up)) > 0) {
        go_results[[paste0(ct, "_UP")]] <- go_up
      }
    }
    
    if (length(down_genes) >= params$go$min_genes) {
      cat(paste0("  ", ct, " - DOWN (n=", length(down_genes), ")\n"))
      go_down <- enrichGO(
        gene = down_genes,
        OrgDb = org.Mm.eg.db,
        keyType = "SYMBOL",
        ont = params$go$ont,
        pAdjustMethod = "BH",
        pvalueCutoff = params$go$pvalue_cutoff,
        qvalueCutoff = params$go$qvalue_cutoff
      )
      if (!is.null(go_down) && nrow(as.data.frame(go_down)) > 0) {
        go_results[[paste0(ct, "_DOWN")]] <- go_down
      }
    }
  }
  
  cat("\n✓ GO enrichment complete\n\n")
  return(go_results)
}

# =============================================================================
# GSEA
# =============================================================================

run_gsea_per_celltype <- function(deg_list, gene_sets = NULL, params = PARAMS) {
  cat("\n=== GSEA ===\n\n")
  
  if (is.null(gene_sets)) {
    m_gene_sets <- msigdbr(species = "Mus musculus", category = "H")
    m_go_sets <- msigdbr(species = "Mus musculus", category = "C5", subcategory = "GO:BP")
    gene_sets <- rbind(m_gene_sets, m_go_sets)
  }
  
  pathways <- split(gene_sets$gene_symbol, gene_sets$gs_name)
  gsea_results <- list()
  
  for (ct in names(deg_list)) {
    cat(paste0("  ", ct, "...\n"))
    
    genes <- deg_list[[ct]]
    genes <- genes[!is.na(genes$avg_log2FC) & !is.na(genes$p_val), ]
    genes$rank_metric <- genes$avg_log2FC + 
                         sign(genes$avg_log2FC) * (-log10(genes$p_val + 1e-300)) * 0.001
    
    gene_ranks <- setNames(genes$rank_metric, genes$gene)
    gene_ranks <- sort(gene_ranks, decreasing = TRUE)
    gene_ranks <- gene_ranks[!duplicated(names(gene_ranks))]
    
    tryCatch({
      fgsea_res <- fgsea(
        pathways = pathways,
        stats = gene_ranks,
        minSize = params$gsea$min_size,
        maxSize = params$gsea$max_size
      )
      fgsea_res <- fgsea_res[!is.na(fgsea_res$padj) & fgsea_res$padj < params$gsea$padj_cutoff, ]
      
      if (nrow(fgsea_res) > 0) {
        gsea_results[[ct]] <- fgsea_res
        cat(paste0("    Found ", nrow(fgsea_res), " pathways\n"))
      }
    }, error = function(e) {
      cat(paste0("    Error: ", e$message, "\n"))
    })
  }
  
  cat("\n✓ GSEA complete\n\n")
  return(gsea_results)
}

plot_gsea_results <- function(gsea_results, top_n = 20) {
  plots <- list()
  
  for (ct in names(gsea_results)) {
    gsea_df <- gsea_results[[ct]]
    if (nrow(gsea_df) == 0) next
    
    gsea_df <- gsea_df[order(abs(gsea_df$NES), decreasing = TRUE), ]
    gsea_df <- head(gsea_df, top_n)
    gsea_df$pathway_clean <- gsub("HALLMARK_|GOBP_", "", gsea_df$pathway)
    gsea_df$pathway_clean <- gsub("_", " ", gsea_df$pathway_clean)
    gsea_df$pathway_clean <- substr(gsea_df$pathway_clean, 1, 50)
    gsea_df$direction <- ifelse(gsea_df$NES > 0, "Upregulated", "Downregulated")
    
    p <- ggplot(gsea_df, aes(x = reorder(pathway_clean, NES), y = NES, fill = direction)) +
      geom_bar(stat = "identity") +
      coord_flip() +
      scale_fill_manual(values = c("Upregulated" = "#e74c3c", "Downregulated" = "#3498db")) +
      theme_classic(base_size = 12) +
      labs(title = paste0("GSEA - ", ct), x = "Pathway", y = "NES", fill = "Direction") +
      theme(plot.title = element_text(hjust = 0.5, face = "bold"))
    
    plots[[ct]] <- p
  }
  
  return(plots)
}

cat("✓ DEG, GO, GSEA functions loaded\n")
## ✓ DEG, GO, GSEA functions loaded
# =============================================================================
# CACOA FUNCTIONS
# =============================================================================

prepare_cacoa_data <- function(seurat_obj, 
                                sample_column = "condition",
                                celltype_column = "SingleR_clean") {
  
  sample_groups <- setNames(seurat_obj@meta.data[[sample_column]], colnames(seurat_obj))
  cell_groups <- setNames(seurat_obj@meta.data[[celltype_column]], colnames(seurat_obj))
  
  DefaultAssay(seurat_obj) <- "RNA"
  if (length(Layers(seurat_obj, assay = "RNA")) > 1) {
    seurat_obj <- JoinLayers(seurat_obj, assay = "RNA")
  }
  
  expr_matrix <- GetAssayData(seurat_obj, assay = "RNA", slot = "data")
  
  return(list(
    expr_matrix = expr_matrix,
    sample_groups = sample_groups,
    cell_groups = cell_groups,
    seurat_obj = seurat_obj
  ))
}

create_pseudo_replicates <- function(seurat_obj, 
                                      condition_column = "condition",
                                      n_pseudo_reps = 3,
                                      seed = 1234) {
  set.seed(seed)
  
  conditions <- unique(seurat_obj@meta.data[[condition_column]])
  cell_to_pseudorep <- character(ncol(seurat_obj))
  names(cell_to_pseudorep) <- colnames(seurat_obj)
  
  for (cond in conditions) {
    cells_cond <- colnames(seurat_obj)[seurat_obj@meta.data[[condition_column]] == cond]
    pseudo_reps <- sample(1:n_pseudo_reps, length(cells_cond), replace = TRUE)
    cell_to_pseudorep[cells_cond] <- paste0(cond, "_rep", pseudo_reps)
  }
  
  return(cell_to_pseudorep)
}

run_cacoa_analysis <- function(seurat_obj,
                                sample_column = "condition",
                                celltype_column = "SingleR_clean",
                                params = PARAMS) {
  
  cat("\n=== CACOA ANALYSIS ===\n\n")
  
  cacoa_data <- prepare_cacoa_data(seurat_obj, sample_column, celltype_column)
  seurat_obj <- cacoa_data$seurat_obj
  
  pseudo_reps <- create_pseudo_replicates(
    seurat_obj, 
    sample_column, 
    params$cacoa$n_pseudo_reps,
    params$random_seed
  )
  
  unique_pseudo_reps <- unique(pseudo_reps)
  sample_groups <- sapply(strsplit(unique_pseudo_reps, "_rep"), `[`, 1)
  names(sample_groups) <- unique_pseudo_reps
  sample_groups <- factor(sample_groups, 
                          levels = c(params$cacoa$ref_level, params$cacoa$target_level))
  
  cat(paste0("Created ", length(unique_pseudo_reps), " pseudo-replicates\n"))
  cat(paste0("  ", params$cacoa$ref_level, ": ", 
             sum(sample_groups == params$cacoa$ref_level), " replicates\n"))
  cat(paste0("  ", params$cacoa$target_level, ": ", 
             sum(sample_groups == params$cacoa$target_level), " replicates\n\n"))
  
  embedding <- Embeddings(seurat_obj, reduction = "umap")
  DefaultAssay(seurat_obj) <- "integrated"
  graph <- seurat_obj@graphs$integrated_snn
  if (!inherits(graph, "dgCMatrix")) {
    graph <- as(graph, "dgCMatrix")
  }
  
  cao <- Cacoa$new(
    data.object = cacoa_data$expr_matrix,
    cell.groups = cacoa_data$cell_groups,
    sample.groups = sample_groups,
    sample.per.cell = pseudo_reps,
    ref.level = params$cacoa$ref_level,
    target.level = params$cacoa$target_level,
    graph = graph,
    embedding = embedding,
    n.cores = params$cacoa$n_cores,
    verbose = FALSE
  )
  
  cao$estimateExpressionShiftMagnitudes()
  cao$estimateCellLoadings()
  cao$estimateCellDensity()
  cao$estimateDiffCellDensity(type = "wilcox")
  
  cat("✓ CACOA analysis complete\n\n")
  return(cao)
}

plot_cacoa_results <- function(cao, tissue_name = "tissue") {
  plot_list <- list()
  
  tryCatch({
    plot_list$abundance_changes <- cao$plotCellLoadings(show.pvals = TRUE)
  }, error = function(e) {})
  
  tryCatch({
    plot_list$expression_shifts <- cao$plotExpressionShiftMagnitudes()
  }, error = function(e) {})
  
  tryCatch({
    cell_counts <- table(cao$cell.groups, cao$sample.per.cell)
    cell_props <- prop.table(cell_counts, margin = 2) * 100
    df_props <- as.data.frame(cell_props)
    colnames(df_props) <- c("CellType", "Sample", "Percentage")
    df_props$Condition <- cao$sample.groups[match(df_props$Sample, names(cao$sample.groups))]
    df_props$Condition <- factor(df_props$Condition, levels = c("WT", "KO"))
    
    plot_list$composition_barplot <- ggplot(df_props, aes(x = Sample, y = Percentage, fill = CellType)) +
      geom_bar(stat = "identity") +
      facet_wrap(~Condition, scales = "free_x") +
      theme_classic(base_size = 12) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
      ggtitle(paste0("Cell Type Composition - ", tissue_name))
  }, error = function(e) {})
  
  return(plot_list)
}

extract_cacoa_results_table <- function(cao) {
  cell_counts <- table(cao$cell.groups, cao$sample.groups[cao$sample.per.cell])
  cell_props <- prop.table(cell_counts, margin = 2) * 100
  
  wt_props <- cell_props[, "WT"]
  ko_props <- cell_props[, "KO"]
  log2fc <- log2((ko_props + 0.01) / (wt_props + 0.01))
  
  results <- data.frame(
    CellType = names(wt_props),
    WT_percent = as.numeric(wt_props),
    KO_percent = as.numeric(ko_props),
    Difference = as.numeric(ko_props - wt_props),
    log2FC = as.numeric(log2fc),
    stringsAsFactors = FALSE
  )
  
  results <- results[order(-results$log2FC), ]
  rownames(results) <- NULL
  
  return(results)
}

export_cacoa_results <- function(cao, cacoa_plots, results_table, 
                                  output_dir = "results/cacoa", 
                                  prefix = "tissue") {
  
  dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  
  write.csv(results_table, 
            file.path(output_dir, paste0(prefix, "_cacoa_results.csv")),
            row.names = FALSE)
  
  pdf(file.path(output_dir, paste0(prefix, "_cacoa_plots.pdf")), 
      width = 12, height = 10)
  
  if (!is.null(cacoa_plots$abundance_changes)) print(cacoa_plots$abundance_changes)
  if (!is.null(cacoa_plots$expression_shifts)) print(cacoa_plots$expression_shifts)
  if (!is.null(cacoa_plots$composition_barplot)) print(cacoa_plots$composition_barplot)
  
  dev.off()
  
  saveRDS(cao, file.path(output_dir, paste0(prefix, "_cacoa_object.rds")))
}

# =============================================================================
# COMPOSITIONAL ANALYSIS
# =============================================================================

run_compositional_analysis <- function(seurat_obj, 
                                        celltype_column = "SingleR_clean",
                                        condition_column = "condition",
                                        params = PARAMS) {
  
  counts <- table(seurat_obj@meta.data[[celltype_column]],
                  seurat_obj@meta.data[[condition_column]])
  
  results <- data.frame(
    CellType = rownames(counts),
    WT_count = counts[, params$cacoa$ref_level],
    KO_count = counts[, params$cacoa$target_level],
    WT_percent = (counts[, params$cacoa$ref_level] / sum(counts[, params$cacoa$ref_level])) * 100,
    KO_percent = (counts[, params$cacoa$target_level] / sum(counts[, params$cacoa$target_level])) * 100,
    log2FC = log2((counts[, params$cacoa$target_level] + 1) / (counts[, params$cacoa$ref_level] + 1)),
    p_value = NA,
    p_adj = NA,
    stringsAsFactors = FALSE
  )
  
  for (i in 1:nrow(results)) {
    ct_count <- c(results$WT_count[i], results$KO_count[i])
    other_count <- c(sum(results$WT_count[-i]), sum(results$KO_count[-i]))
    contingency <- matrix(c(ct_count, other_count), nrow = 2, byrow = TRUE)
    fisher_result <- fisher.test(contingency)
    results$p_value[i] <- fisher_result$p.value
  }
  
  results$p_adj <- p.adjust(results$p_value, method = "BH")
  results <- results[order(results$p_adj), ]
  
  return(list(results = results, counts = counts))
}

plot_compositional_results <- function(comp_results, tissue_name = "tissue") {
  results <- comp_results$results
  counts <- comp_results$counts
  
  # Volcano plot
  results$sig <- ifelse(results$p_adj < 0.05, "Significant", "Not Significant")
  
  p_volcano <- ggplot(results, aes(x = log2FC, y = -log10(p_adj), color = sig, label = CellType)) +
    geom_point(size = 3, alpha = 0.7) +
    geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +
    scale_color_manual(values = c("Significant" = "#e74c3c", "Not Significant" = "grey")) +
    geom_text_repel(data = subset(results, p_adj < 0.05), size = 3) +
    theme_classic(base_size = 14) +
    labs(title = paste0("Compositional Changes - ", tissue_name),
         x = "Log2 Fold Change (KO/WT)",
         y = "-log10(Adjusted p-value)") +
    theme(plot.title = element_text(hjust = 0.5, face = "bold"))
  
  # Alluvial plot
  props <- prop.table(counts, margin = 2) * 100
  props_df <- as.data.frame(props)
  colnames(props_df) <- c("CellType", "Condition", "Proportion")
  props_df$Condition <- factor(props_df$Condition, levels = c("WT", "KO"))
  
  n_celltypes <- length(unique(props_df$CellType))
  colors <- colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))(n_celltypes)
  names(colors) <- unique(props_df$CellType)
  
  p_alluvial <- ggplot(props_df,
                       aes(x = Condition, stratum = CellType, alluvium = CellType,
                           y = Proportion, fill = CellType)) +
    geom_alluvium(alpha = 0.6, knot.pos = 0.4) +
    geom_stratum(alpha = 0.9, color = "grey70", width = 0.3) +
    scale_fill_manual(values = colors, name = "Cell Type") +
    labs(x = "", y = "Cell Proportion (%)",
         title = paste0("Flow of Cell Type Proportions - ", tissue_name)) +
    theme_minimal(base_size = 14) +
    theme(panel.grid = element_blank(),
          plot.title = element_text(face = "bold", hjust = 0.5))
  
  return(list(volcano = p_volcano, alluvial = p_alluvial))
}

cat("✓ CACOA and Compositional analysis functions loaded\n")
## ✓ CACOA and Compositional analysis functions loaded
# =============================================================================
# COMPOSITIONAL DATA ANALYSIS (CLR METHOD)
# =============================================================================

compositional_analysis_proper <- function(seurat_obj, 
                                           celltype_column = "SingleR_clean",
                                           condition_column = "condition",
                                           params = PARAMS) {
  
  cat("\n=== COMPOSITIONAL DATA ANALYSIS (CLR METHOD) ===\n")
  cat("Using Centered Log-Ratio transformation to account for compositional constraint\n\n")
  
  # Get counts
  counts <- table(seurat_obj@meta.data[[celltype_column]],
                  seurat_obj@meta.data[[condition_column]])
  
  cat(paste0("Analyzing ", nrow(counts), " cell types\n"))
  cat(paste0("Conditions: ", paste(colnames(counts), collapse = " vs "), "\n\n"))
  
  # Convert to proportions
  props <- prop.table(counts, margin = 2)
  
  # CLR transformation function
  clr_transform <- function(x) {
    # Add small pseudocount to avoid log(0)
    x_pseudo <- x + 1e-6
    # CLR: log(x) - mean(log(x))
    log(x_pseudo) - mean(log(x_pseudo))
  }
  
  # Apply CLR to each condition
  clr_wt <- clr_transform(props[, params$cacoa$ref_level])
  clr_ko <- clr_transform(props[, params$cacoa$target_level])
  
  # Calculate difference in CLR space
  clr_diff <- clr_ko - clr_wt
  
  # Create results dataframe
  results <- data.frame(
    CellType = names(clr_diff),
    WT_percent = props[names(clr_diff), params$cacoa$ref_level] * 100,
    KO_percent = props[names(clr_diff), params$cacoa$target_level] * 100,
    WT_count = counts[names(clr_diff), params$cacoa$ref_level],
    KO_count = counts[names(clr_diff), params$cacoa$target_level],
    CLR_WT = clr_wt,
    CLR_KO = clr_ko,
    CLR_difference = clr_diff,
    stringsAsFactors = FALSE
  )
  
  # Add interpretation based on CLR difference magnitude
  results$Interpretation <- case_when(
    abs(results$CLR_difference) > 1.0 ~ "Large change",
    abs(results$CLR_difference) > 0.5 ~ "Moderate change",
    abs(results$CLR_difference) > 0.2 ~ "Small change",
    TRUE ~ "Negligible"
  )
  
  results$Direction <- ifelse(results$CLR_difference > 0, "Enriched in KO", "Depleted in KO")
  
  # Sort by magnitude of change
  results <- results[order(-abs(results$CLR_difference)), ]
  rownames(results) <- NULL
  
  # Print summary
  cat("Compositional changes summary:\n")
  cat("─────────────────────────────\n")
  cat(sprintf("Large changes (|CLR| > 1.0): %d\n", 
              sum(abs(results$CLR_difference) > 1.0)))
  cat(sprintf("Moderate changes (|CLR| > 0.5): %d\n", 
              sum(abs(results$CLR_difference) > 0.5)))
  cat(sprintf("Small changes (|CLR| > 0.2): %d\n\n", 
              sum(abs(results$CLR_difference) > 0.2)))
  
  # Print top changes
  cat("Top compositional changes:\n")
  print(results[1:min(10, nrow(results)), 
                c("CellType", "WT_percent", "KO_percent", 
                  "CLR_difference", "Interpretation")])
  cat("\n")
  
  return(list(
    results = results,
    counts = counts,
    props = props
  ))
}

# -----------------------------------------------------------------------------
# VISUALIZATION FOR CLR RESULTS
# -----------------------------------------------------------------------------

plot_clr_compositional <- function(clr_results, tissue_name = "tissue") {
  
  results <- clr_results$results
  
  # Assign colors based on magnitude and direction
  results$Color <- case_when(
    results$CLR_difference > 1.0 ~ "Strong enrichment",
    results$CLR_difference > 0.5 ~ "Moderate enrichment",
    results$CLR_difference > 0 ~ "Mild enrichment",
    results$CLR_difference > -0.5 ~ "Mild depletion",
    results$CLR_difference > -1.0 ~ "Moderate depletion",
    TRUE ~ "Strong depletion"
  )
  
  results$Color <- factor(results$Color, 
                         levels = c("Strong enrichment", "Moderate enrichment", 
                                   "Mild enrichment", "Mild depletion", 
                                   "Moderate depletion", "Strong depletion"))
  
  # Plot 1: CLR difference barplot
  p_clr <- ggplot(results, aes(x = reorder(CellType, CLR_difference), 
                                y = CLR_difference,
                                fill = Color)) +
    geom_bar(stat = "identity") +
    geom_hline(yintercept = c(-1, -0.5, 0, 0.5, 1), 
               linetype = "dashed", color = "grey50", alpha = 0.5) +
    coord_flip() +
    scale_fill_manual(
      values = c(
        "Strong enrichment" = "#d62728",
        "Moderate enrichment" = "#ff7f0e", 
        "Mild enrichment" = "#ffbb78",
        "Mild depletion" = "#aec7e8",
        "Moderate depletion" = "#1f77b4",
        "Strong depletion" = "#17426b"
      ),
      name = "Change Magnitude"
    ) +
    theme_classic(base_size = 12) +
    labs(
      title = paste0("Compositional Changes (CLR) - ", tissue_name),
      subtitle = "Centered Log-Ratio accounts for compositional constraint",
      x = "Cell Type",
      y = "CLR Difference (KO - WT)",
      caption = "|CLR| > 1.0 = Large change | |CLR| > 0.5 = Moderate change"
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 10),
      plot.caption = element_text(hjust = 0.5, size = 9, color = "grey50"),
      legend.position = "right"
    )
  
  # Plot 2: CLR space visualization (WT vs KO)
  p_scatter <- ggplot(results, aes(x = CLR_WT, y = CLR_KO, 
                                    label = CellType, color = Color)) +
    geom_point(size = 3, alpha = 0.7) +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey50") +
    geom_text_repel(size = 3, max.overlaps = 15) +
    scale_color_manual(
      values = c(
        "Strong enrichment" = "#d62728",
        "Moderate enrichment" = "#ff7f0e",
        "Mild enrichment" = "#ffbb78",
        "Mild depletion" = "#aec7e8",
        "Moderate depletion" = "#1f77b4",
        "Strong depletion" = "#17426b"
      ),
      name = "Change"
    ) +
    theme_classic(base_size = 12) +
    labs(
      title = paste0("CLR Space Comparison - ", tissue_name),
      subtitle = "Points above diagonal = enriched in KO | Points below = depleted in KO",
      x = "CLR (WT)",
      y = "CLR (KO)"
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5, size = 10)
    )
  
  # Plot 3: Proportions comparison (for reference)
  props <- clr_results$props
  props_df <- as.data.frame(props * 100)
  props_df$CellType <- rownames(props_df)
  props_long <- reshape2::melt(props_df, id.vars = "CellType",
                               variable.name = "Condition", 
                               value.name = "Percentage")
  
  p_props <- ggplot(props_long, aes(x = CellType, y = Percentage, 
                                    fill = Condition)) +
    geom_bar(stat = "identity", position = "dodge") +
    coord_flip() +
    scale_fill_manual(values = c("WT" = "#3498db", "KO" = "#e74c3c")) +
    theme_classic(base_size = 12) +
    labs(
      title = paste0("Cell Type Proportions - ", tissue_name),
      subtitle = "Raw proportions (for reference)",
      x = "Cell Type",
      y = "Percentage (%)"
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold"),
      legend.position = "top"
    )
  
  return(list(
    clr_barplot = p_clr,
    clr_scatter = p_scatter,
    proportions = p_props
  ))
}

cat("✓ CLR compositional analysis functions loaded\n")
## ✓ CLR compositional analysis functions loaded
# =============================================================================
# EXPORT FUNCTIONS
# =============================================================================

export_tissue_results <- function(seurat_obj, 
                                   deg_list, 
                                   go_results, 
                                   gsea_results,
                                   comp_results, 
                                   clr_results = NULL,
                                   tissue_name = "tissue",
                                   output_dir = "results") {
  
  # ADD ERROR CHECKING
  if (is.null(tissue_name) || tissue_name == "") {
    stop("tissue_name cannot be NULL or empty")
  }
  
  if (is.null(output_dir) || output_dir == "") {
    stop("output_dir cannot be NULL or empty")
  }
  
  cat(paste0("Exporting results for: ", tissue_name, "\n"))
  cat(paste0("Output directory: ", output_dir, "\n"))
  
  tissue_lower <- tolower(gsub(" ", "", tissue_name))
  tissue_dir <- file.path(output_dir, tissue_lower)
  
  cat(paste0("Creating directory: ", tissue_dir, "\n"))
  
  dir.create(file.path(tissue_dir, "DEG"), recursive = TRUE, showWarnings = FALSE)
  dir.create(file.path(tissue_dir, "GO"), recursive = TRUE, showWarnings = FALSE)
  dir.create(file.path(tissue_dir, "GSEA"), recursive = TRUE, showWarnings = FALSE)
  
  
  # Export DEGs
  for (ct in names(deg_list)) {
    filename <- paste0("DEG_", gsub("[/ ]", "_", ct), "_KO_vs_WT.csv")
    write.csv(deg_list[[ct]], file.path(tissue_dir, "DEG", filename), row.names = FALSE)
  }
  
  # Export GO
  if (!is.null(go_results) && length(go_results) > 0) {
    for (result_name in names(go_results)) {
      filename <- paste0("GO_", gsub("[/ ]", "_", result_name), ".csv")
      go_df <- if (class(go_results[[result_name]])[1] == "enrichResult") {
        as.data.frame(go_results[[result_name]])
      } else {
        go_results[[result_name]]
      }
      write.csv(go_df, file.path(tissue_dir, "GO", filename), row.names = FALSE)
    }
  }
  
  # Export GSEA
  if (!is.null(gsea_results) && length(gsea_results) > 0) {
    for (ct in names(gsea_results)) {
      filename <- paste0("GSEA_", gsub("[/ ]", "_", ct), ".csv")
      gsea_df <- as.data.frame(gsea_results[[ct]])
      
      if ("leadingEdge" %in% colnames(gsea_df)) {
        gsea_df$leadingEdge <- sapply(gsea_df$leadingEdge, function(x) {
          paste(unlist(x), collapse = ";")
        })
      }
      
      list_cols <- sapply(gsea_df, is.list)
      if (any(list_cols)) {
        for (col in names(list_cols)[list_cols]) {
          gsea_df[[col]] <- sapply(gsea_df[[col]], function(x) {
            paste(unlist(x), collapse = ";")
          })
        }
      }
      
      write.csv(gsea_df, file.path(tissue_dir, "GSEA", filename), row.names = FALSE)
    }
  }
  
  # Export CLR compositional analysis
  if (!is.null(clr_results)) {
    write.csv(clr_results$results,
              file.path(tissue_dir, paste0(tissue_lower, "_compositional_CLR.csv")),
              row.names = FALSE)
  }
  
  # Export compositional
  write.csv(comp_results$results, 
            file.path(tissue_dir, paste0(tissue_lower, "_compositional_analysis.csv")),
            row.names = FALSE)
  
  # Save Seurat object
  saveRDS(seurat_obj, 
          file.path(tissue_dir, paste0(tissue_lower, "_integrated_annotated.rds")))
}


# =============================================================================
# MASTER WRAPPER FUNCTION - ANALYZES ONE TISSUE END-TO-END
# =============================================================================

analyze_tissue_complete <- function(
  wt_seurat,
  ko_seurat,
  tissue_name,
  output_dir = "results",
  params = PARAMS,
  s_genes = NULL,
  g2m_genes = NULL,
  verbose = TRUE
) {
  
  if (verbose) {
    cat("\n")
    cat("=============================================================================\n")
    cat(paste0("  ANALYZING: ", tissue_name, "\n"))
    cat("=============================================================================\n\n")
  }
  
  tissue_dir <- file.path(output_dir, tolower(gsub(" ", "", tissue_name)))
  dir.create(tissue_dir, recursive = TRUE, showWarnings = FALSE)
  
  # Step 1: Preprocessing
  if (verbose) cat("STEP 1/9: Preprocessing...\n")
  
  wt_seurat$condition <- params$cacoa$ref_level
  ko_seurat$condition <- params$cacoa$target_level
  
  combined <- merge(wt_seurat, ko_seurat,
                    add.cell.ids = c("WT", "KO"),
                    project = tissue_name)
  
  if (verbose) cat(paste0("  Merged: ", ncol(combined), " cells\n"))
  
  # Step 2: Normalization
  if (verbose) cat("STEP 2/9: Normalization...\n")
  
  combined <- NormalizeData(combined,
                            normalization.method = params$normalization$method,
                            scale.factor = params$normalization$scale_factor)
  combined <- FindVariableFeatures(combined, 
                                   nfeatures = params$normalization$n_variable_features)
  
  # Step 3: Cell Cycle
  if (verbose) cat("STEP 3/9: Cell cycle scoring...\n")
  
  if (!is.null(s_genes) && !is.null(g2m_genes)) {
    combined <- CellCycleScoring(combined, 
                                 s.features = s_genes,
                                 g2m.features = g2m_genes)
    
    if (params$cell_cycle$regress) {
      combined <- ScaleData(combined, 
                           vars.to.regress = params$cell_cycle$vars_to_regress)
    }
  } else {
    combined <- ScaleData(combined)
  }
  
  # Step 4: Integration
  if (verbose) cat("STEP 4/9: Integration...\n")
  
  sample_list <- SplitObject(combined, split.by = "condition")
  anchors <- FindIntegrationAnchors(object.list = sample_list, 
                                    dims = params$pca$dims_use)
  integrated <- IntegrateData(anchorset = anchors, 
                              dims = params$pca$dims_use)
  
  # Step 5: Dimensionality Reduction & Clustering
  if (verbose) cat("STEP 5/9: PCA, UMAP, clustering...\n")
  
  integrated <- ScaleData(integrated)
  integrated <- RunPCA(integrated, 
                       npcs = params$pca$npcs,
                       seed.use = params$random_seed)
  integrated <- RunUMAP(integrated, 
                        dims = params$umap$dims,
                        seed.use = params$umap$seed)
  integrated <- FindNeighbors(integrated, 
                              dims = params$clustering$dims)
  integrated <- FindClusters(integrated, 
                             resolution = params$clustering$resolution,
                             random.seed = params$clustering$seed)
  
  if (verbose) cat(paste0("  Found ", length(unique(integrated$seurat_clusters)), " clusters\n"))
  
  # Step 6: Annotation
  if (verbose) cat("STEP 6/9: Cell type annotation...\n")
  
  integrated <- annotate_immune_cells(integrated)
  
  # Step 7: DEG, GO, GSEA
  if (verbose) cat("STEP 7/9: DEG, GO, GSEA...\n")
  
  deg_results <- find_degs_per_celltype(integrated, 
                                        min_cells = params$deg$min_cells_per_group,
                                        params = params)
  go_results <- run_go_enrichment(deg_results, params = params)
  gsea_results <- run_gsea_per_celltype(deg_results, params = params)
  gsea_plots <- plot_gsea_results(gsea_results, top_n = params$gsea$top_n_plot)
  
  # Step 8: Compositional
  if (verbose) cat("STEP 8/9: Compositional analysis...\n")
  
  comp_results <- run_compositional_analysis(integrated, params = params)
  comp_plots <- plot_compositional_results(comp_results, tissue_name)
  
  if (verbose) cat("STEP 8b/9: CLR compositional analysis...\n")

  clr_results <- compositional_analysis_proper(
    integrated,
    celltype_column = "SingleR_clean",
    condition_column = "condition",
    params = params
  )
  
  clr_plots <- plot_clr_compositional(clr_results, tissue_name)
  # Step 9: CACOA
  if (verbose) cat("STEP 9/9: CACOA...\n")
  
  integrated$condition <- factor(integrated$condition, 
                                 levels = c(params$cacoa$ref_level, params$cacoa$target_level))
  
  cao <- run_cacoa_analysis(integrated, params = params)
  cacoa_plots <- plot_cacoa_results(cao, tissue_name)
  cacoa_results_table <- extract_cacoa_results_table(cao)
  
  # Export
  if (verbose) cat("\nExporting results...\n")
  
  export_tissue_results(integrated, deg_results, go_results, gsea_results,
                      comp_results, clr_results,
                      tissue_name, output_dir)

  stopifnot(
  is.character(output_dir),
  length(output_dir) == 1,
  !is.na(output_dir),
  nchar(output_dir) > 0)
  
    
  export_cacoa_results(cao, cacoa_plots, cacoa_results_table,
                       file.path(output_dir, "cacoa"),
                       tolower(gsub(" ", "", tissue_name)))
  
  # Save compositional plots
  pdf(file.path(tissue_dir, paste0(tolower(gsub(" ", "", tissue_name)), 
                                    "_compositional_plots.pdf")), 
      width = 12, height = 10)
  print(comp_plots$volcano)
  print(comp_plots$alluvial)
  dev.off()
  
  # Save CLR plots
  pdf(file.path(tissue_dir, paste0(tolower(gsub(" ", "", tissue_name)), 
                                    "_compositional_CLR_plots.pdf")), 
      width = 12, height = 10)
  print(clr_plots$clr_barplot)
  print(clr_plots$clr_scatter)
  dev.off()
  
  # Save GSEA plots
  pdf(file.path(tissue_dir, paste0(tolower(gsub(" ", "", tissue_name)), 
                                    "_gsea_plots.pdf")), 
      width = 12, height = 10)
  for (ct in names(gsea_plots)) {
    print(gsea_plots[[ct]])
  }
  dev.off()
  
  # Return everything
  results <- list(
    seurat = integrated,
    deg = deg_results,
    go = go_results,
    gsea = gsea_results,
    gsea_plots = gsea_plots,
    compositional = comp_results,
    compositional_plots = comp_plots,
    clr_compositional = clr_results,
    clr_plots = clr_plots,
    cacoa = cao,
    cacoa_plots = cacoa_plots,
    cacoa_results = cacoa_results_table,
    params = params,
    tissue_name = tissue_name,
    output_dir = tissue_dir
  )
  
  saveRDS(results, file.path(tissue_dir, paste0(tolower(gsub(" ", "", tissue_name)), 
                                                 "_complete_results.rds")))
  
  if (verbose) {
    cat("\n")
    cat("=============================================================================\n")
    cat(paste0("  ✓ ", tissue_name, " ANALYSIS COMPLETE\n"))
    cat("=============================================================================\n\n")
  }
  
  return(results)
}

cat("\n✓ All analysis functions loaded (including master wrapper)\n")
## 
## ✓ All analysis functions loaded (including master wrapper)

7. Cell Cycle Genes

cat("\n=== PREPARING CELL CYCLE GENES ===\n\n")
## 
## === PREPARING CELL CYCLE GENES ===
# Get mouse cell cycle genes
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

# Convert to mouse format (Title case)
s.genes <- str_to_title(tolower(s.genes))
g2m.genes <- str_to_title(tolower(g2m.genes))

# Filter for genes present in data
s.genes <- intersect(s.genes, rownames(spl_WT_seurat))
g2m.genes <- intersect(g2m.genes, rownames(spl_WT_seurat))

cat(paste0("S phase genes: ", length(s.genes), "\n"))
## S phase genes: 42
cat(paste0("G2M phase genes: ", length(g2m.genes), "\n\n"))
## G2M phase genes: 52

8. Spleen Analysis

cat("\n=== ANALYZING SPLEEN ===\n\n")
## 
## === ANALYZING SPLEEN ===
# Run complete analysis with wrapper function
results_spleen <- analyze_tissue_complete(
  wt_seurat = spl_WT_seurat,
  ko_seurat = spl_KO_seurat,
  tissue_name = "Spleen",
  output_dir = PARAMS$output$base_dir,
  params = PARAMS,
  s_genes = s.genes,
  g2m_genes = g2m.genes,
  verbose = TRUE
)
## 
## =============================================================================
##   ANALYZING: Spleen
## =============================================================================
## 
## STEP 1/9: Preprocessing...
##   Merged: 6088 cells
## STEP 2/9: Normalization...
## STEP 3/9: Cell cycle scoring...
## STEP 4/9: Integration...
## STEP 5/9: PCA, UMAP, clustering...
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6088
## Number of edges: 225221
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9363
## Number of communities: 21
## Elapsed time: 0 seconds
##   Found 21 clusters
## STEP 6/9: Cell type annotation...
## Running SingleR annotation...
## ✓ Identified 14 cell types
## STEP 7/9: DEG, GO, GSEA...
## 
## === DIFFERENTIAL EXPRESSION ANALYSIS ===
## 
##   NKT (WT=65, KO=35)...
##     ↑ 1 | ↓ 0
##   Stem cells (WT=69, KO=37)...
##     ↑ 1 | ↓ 0
##   T cells (WT=971, KO=1182)...
##     ↑ 817 | ↓ 123
##   B cells (WT=1091, KO=1124)...
##     ↑ 736 | ↓ 101
##   Neutrophils (WT=527, KO=164)...
##     ↑ 8 | ↓ 13
##   DC (WT=78, KO=48)...
##     ↑ 0 | ↓ 0
##   NK cells (WT=67, KO=42)...
##     ↑ 4 | ↓ 0
##   Macrophages (WT=167, KO=52)...
##     ↑ 3 | ↓ 0
##   Basophils (WT=9, KO=6)...
##     ↑ 0 | ↓ 0
##   Tgd (WT=26, KO=5)...
##     ↑ 0 | ↓ 0
##   Monocytes (WT=140, KO=162)...
##     ↑ 8 | ↓ 10
##   Skipping Eosinophils (insufficient cells)
##   ILC (WT=15, KO=4)...
##     ↑ 0 | ↓ 0
##   Skipping B cells, pro (insufficient cells)
## 
## ✓ DEG analysis complete
## 
## 
## === GO ENRICHMENT ===
## 
##   T cells - UP (n=817)
##   T cells - DOWN (n=123)
##   B cells - UP (n=736)
##   B cells - DOWN (n=101)
##   Neutrophils - UP (n=8)
##   Neutrophils - DOWN (n=13)
##   Monocytes - UP (n=8)
##   Monocytes - DOWN (n=10)
## 
## ✓ GO enrichment complete
## 
## 
## === GSEA ===
##   NKT...
##     Found 1 pathways
##   Stem cells...
##     Found 497 pathways
##   T cells...
##     Found 68 pathways
##   B cells...
##     Found 280 pathways
##   Neutrophils...
##   DC...
##   NK cells...
##     Found 5 pathways
##   Macrophages...
##     Found 36 pathways
##   Basophils...
##     Found 7 pathways
##   Tgd...
##     Found 17 pathways
##   Monocytes...
##     Found 98 pathways
##   ILC...
##     Found 11 pathways
## 
## ✓ GSEA complete
## 
## STEP 8/9: Compositional analysis...
## STEP 8b/9: CLR compositional analysis...
## 
## === COMPOSITIONAL DATA ANALYSIS (CLR METHOD) ===
## Using Centered Log-Ratio transformation to account for compositional constraint
## 
## Analyzing 14 cell types
## Conditions: KO vs WT
## 
## Compositional changes summary:
## ─────────────────────────────
## Large changes (|CLR| > 1.0): 3
## Moderate changes (|CLR| > 0.5): 9
## Small changes (|CLR| > 0.2): 9
## 
## Top compositional changes:
##        CellType  WT_percent KO_percent CLR_difference  Interpretation
## 1  B cells, pro  0.00000000  0.0349406      6.2858852    Large change
## 2   Eosinophils  0.03099814  0.0000000     -5.3129404    Large change
## 3           Tgd  0.80595164  0.1747030     -1.1016955    Large change
## 4           ILC  0.46497210  0.1397624     -0.7747407 Moderate change
## 5       T cells 30.09919405 41.2997904      0.7431508 Moderate change
## 6     Monocytes  4.33973962  5.6603774      0.6924635 Moderate change
## 7   Neutrophils 16.33601984  5.7302586     -0.6208078 Moderate change
## 8   Macrophages  5.17668940  1.8169113     -0.6201994 Moderate change
## 9       B cells 33.81897086 39.2732355      0.5763136 Moderate change
## 10    Basophils  0.27898326  0.2096436      0.1411684      Negligible
## STEP 9/9: CACOA...
## 
## === CACOA ANALYSIS ===
## Created 6 pseudo-replicates
##   WT: 3 replicates
##   KO: 3 replicates
## ✓ CACOA analysis complete
## 
## 
## Exporting results...
## Exporting results for: Spleen
## Output directory: results
## Creating directory: results/spleen
## 
## =============================================================================
##   ✓ Spleen ANALYSIS COMPLETE
## =============================================================================
# Extract objects for downstream use
spl_integrated <- results_spleen$seurat
deg_results_spl <- results_spleen$deg
go_results_spl <- results_spleen$go
gsea_results_spl <- results_spleen$gsea
gsea_plots_spl <- results_spleen$gsea_plots
comp_results_spl <- results_spleen$compositional
comp_plots_spl <- results_spleen$compositional_plots
clr_results_spl <- results_spleen$clr_compositional
clr_plots_spl <- results_spleen$clr_plots

cat("\n✓ Spleen analysis complete\n")
## 
## ✓ Spleen analysis complete
cat(paste0("  Cells: ", ncol(spl_integrated), "\n"))
##   Cells: 6088
cat(paste0("  Cell types: ", length(unique(spl_integrated$SingleR_clean)), "\n"))
##   Cell types: 14
cat(paste0("  DEG cell types: ", length(deg_results_spl), "\n"))
##   DEG cell types: 12
cat(paste0("  Results: ", results_spleen$output_dir, "\n\n"))
##   Results: results/spleen
# Display key plots
print(DimPlot(spl_integrated, group.by = "condition", 
              cols = c("WT" = "#3498db", "KO" = "#e74c3c")) +
  ggtitle("Spleen - WT vs KO"))

print(DimPlot(spl_integrated, group.by = "SingleR_clean", label = FALSE) +
  ggtitle("Spleen - Cell Type Annotation"))

if (!is.null(comp_plots_spl$volcano)) {
  print(comp_plots_spl$volcano)
}

if (!is.null(comp_plots_spl$alluvial)) {
  print(comp_plots_spl$alluvial)
}

if (!is.null(clr_plots_spl$clr_barplot)) {
  print(clr_plots_spl$clr_barplot)
}

if (!is.null(clr_plots_spl$clr_scatter)) {
  print(clr_plots_spl$clr_scatter)
}

9. Bone Marrow Analysis

cat("\n=== ANALYZING BONE MARROW ===\n\n")
## 
## === ANALYZING BONE MARROW ===
# Run complete analysis with wrapper function
results_bonemarrow <- analyze_tissue_complete(
  wt_seurat = bm_WT_seurat,
  ko_seurat = bm_KO_seurat,
  tissue_name = "BoneMarrow",
  output_dir = PARAMS$output$base_dir,
  params = PARAMS,
  s_genes = s.genes,
  g2m_genes = g2m.genes,
  verbose = TRUE
)
## 
## =============================================================================
##   ANALYZING: BoneMarrow
## =============================================================================
## 
## STEP 1/9: Preprocessing...
##   Merged: 7636 cells
## STEP 2/9: Normalization...
## STEP 3/9: Cell cycle scoring...
## STEP 4/9: Integration...
## STEP 5/9: PCA, UMAP, clustering...
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7636
## Number of edges: 296773
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9364
## Number of communities: 25
## Elapsed time: 0 seconds
##   Found 25 clusters
## STEP 6/9: Cell type annotation...
## Running SingleR annotation...
## ✓ Identified 17 cell types
## STEP 7/9: DEG, GO, GSEA...
## 
## === DIFFERENTIAL EXPRESSION ANALYSIS ===
## 
##   Macrophages (WT=97, KO=189)...
##     ↑ 13 | ↓ 18
##   Neutrophils (WT=1590, KO=1670)...
##     ↑ 258 | ↓ 80
##   Stem cells (WT=494, KO=785)...
##     ↑ 359 | ↓ 57
##   B cells (WT=356, KO=582)...
##     ↑ 876 | ↓ 149
##   Monocytes (WT=341, KO=481)...
##     ↑ 91 | ↓ 68
##   T cells (WT=175, KO=380)...
##     ↑ 73 | ↓ 64
##   Skipping Tgd (insufficient cells)
##   NK cells (WT=26, KO=65)...
##     ↑ 1 | ↓ 8
##   ILC (WT=6, KO=16)...
##     ↑ 0 | ↓ 0
##   Basophils (WT=16, KO=21)...
##     ↑ 0 | ↓ 2
##   NKT (WT=33, KO=32)...
##     ↑ 0 | ↓ 5
##   DC (WT=40, KO=142)...
##     ↑ 3 | ↓ 13
##   Fibroblasts (WT=9, KO=34)...
##     ↑ 0 | ↓ 4
##   B cells, pro (WT=4, KO=5)...
##     ↑ 0 | ↓ 0
##   Skipping Endothelial cells (insufficient cells)
##   Stromal cells (WT=3, KO=8)...
##     ↑ 0 | ↓ 0
##   Skipping Eosinophils (insufficient cells)
## 
## ✓ DEG analysis complete
## 
## 
## === GO ENRICHMENT ===
## 
##   Macrophages - UP (n=13)
##   Macrophages - DOWN (n=18)
##   Neutrophils - UP (n=258)
##   Neutrophils - DOWN (n=80)
##   Stem cells - UP (n=359)
##   Stem cells - DOWN (n=57)
##   B cells - UP (n=876)
##   B cells - DOWN (n=149)
##   Monocytes - UP (n=91)
##   Monocytes - DOWN (n=68)
##   T cells - UP (n=73)
##   T cells - DOWN (n=64)
##   NK cells - DOWN (n=8)
##   NKT - DOWN (n=5)
##   DC - DOWN (n=13)
## 
## ✓ GO enrichment complete
## 
## 
## === GSEA ===
## 
##   Macrophages...
##     Found 186 pathways
##   Neutrophils...
##     Found 56 pathways
##   Stem cells...
##     Found 97 pathways
##   B cells...
##     Found 288 pathways
##   Monocytes...
##     Found 78 pathways
##   T cells...
##     Found 49 pathways
##   NK cells...
##   ILC...
##     Found 9 pathways
##   Basophils...
##   NKT...
##   DC...
##     Found 138 pathways
##   Fibroblasts...
##     Found 12 pathways
##   B cells, pro...
##     Found 237 pathways
##   Stromal cells...
## 
## ✓ GSEA complete
## 
## STEP 8/9: Compositional analysis...
## STEP 8b/9: CLR compositional analysis...
## 
## === COMPOSITIONAL DATA ANALYSIS (CLR METHOD) ===
## Using Centered Log-Ratio transformation to account for compositional constraint
## 
## Analyzing 17 cell types
## Conditions: KO vs WT
## 
## Compositional changes summary:
## ─────────────────────────────
## Large changes (|CLR| > 1.0): 2
## Moderate changes (|CLR| > 0.5): 7
## Small changes (|CLR| > 0.2): 14
## 
## Top compositional changes:
##             CellType  WT_percent  KO_percent CLR_difference  Interpretation
## 1                Tgd  0.46772685  0.04515692     -2.5708761    Large change
## 2  Endothelial cells  0.03118179  0.36125536      2.2116923    Large change
## 3        Fibroblasts  0.28063611  0.76766765      0.7709386 Moderate change
## 4                 DC  1.24727159  3.20614134      0.7089273 Moderate change
## 5                NKT  1.02899906  0.72251072     -0.5887018 Moderate change
## 6        Eosinophils  0.03118179  0.02257846     -0.5567540 Moderate change
## 7        Neutrophils 49.57904584 37.70602845     -0.5088811 Moderate change
## 8                ILC  0.18709074  0.36125536      0.4226003    Small change
## 9      Stromal cells  0.09354537  0.18062768      0.4223429    Small change
## 10          NK cells  0.81072654  1.46759991      0.3582642    Small change
## STEP 9/9: CACOA...
## 
## === CACOA ANALYSIS ===
## 
## Created 6 pseudo-replicates
##   WT: 3 replicates
##   KO: 3 replicates
## ✓ CACOA analysis complete
## 
## 
## Exporting results...
## Exporting results for: BoneMarrow
## Output directory: results
## Creating directory: results/bonemarrow
## 
## =============================================================================
##   ✓ BoneMarrow ANALYSIS COMPLETE
## =============================================================================
# Extract objects for downstream use
bm_integrated <- results_bonemarrow$seurat
deg_results_bm <- results_bonemarrow$deg
go_results_bm <- results_bonemarrow$go
gsea_results_bm <- results_bonemarrow$gsea
gsea_plots_bm <- results_bonemarrow$gsea_plots
comp_results_bm <- results_bonemarrow$compositional
comp_plots_bm <- results_bonemarrow$compositional_plots
clr_results_bm <- results_bonemarrow$clr_compositional
clr_plots_bm <- results_bonemarrow$clr_plots

cat("\n✓ Bone Marrow analysis complete\n")
## 
## ✓ Bone Marrow analysis complete
cat(paste0("  Cells: ", ncol(bm_integrated), "\n"))
##   Cells: 7636
cat(paste0("  Cell types: ", length(unique(bm_integrated$SingleR_clean)), "\n"))
##   Cell types: 17
cat(paste0("  DEG cell types: ", length(deg_results_bm), "\n"))
##   DEG cell types: 14
cat(paste0("  Results: ", results_bonemarrow$output_dir, "\n\n"))
##   Results: results/bonemarrow
# Display key plots
print(DimPlot(bm_integrated, group.by = "condition",
              cols = c("WT" = "#3498db", "KO" = "#e74c3c")) +
  ggtitle("Bone Marrow - WT vs KO"))

print(DimPlot(bm_integrated, group.by = "SingleR_clean", label = FALSE) +
  ggtitle("Bone Marrow - Cell Type Annotation"))

if (!is.null(comp_plots_bm$volcano)) {
  print(comp_plots_bm$volcano)
}

if (!is.null(comp_plots_bm$alluvial)) {
  print(comp_plots_bm$alluvial)
}

if (!is.null(clr_plots_bm$clr_barplot)) {
  print(clr_plots_bm$clr_barplot)
}

if (!is.null(clr_plots_bm$clr_scatter)) {
  print(clr_plots_bm$clr_scatter)
}

cat("\n=== CREATING COMBINED ALLUVIAL PLOTS ===\n\n")
## 
## === CREATING COMBINED ALLUVIAL PLOTS ===
# Check that both analyses exist
if (!exists("comp_results_spl") || !exists("comp_results_bm")) {
  stop("Error: Run compositional analysis for both tissues first")
}

# =============================================================================
# CREATE GLOBAL COLOR PALETTE
# =============================================================================

cat("Creating global color palette...\n")
## Creating global color palette...
# Get all unique cell types from both tissues
spl_celltypes <- rownames(comp_results_spl$counts)
bm_celltypes <- rownames(comp_results_bm$counts)

all_celltypes <- unique(c(spl_celltypes, bm_celltypes))
all_celltypes <- sort(all_celltypes)  # Alphabetical for consistency

n_celltypes <- length(all_celltypes)

cat(paste0("  Spleen cell types: ", length(spl_celltypes), "\n"))
##   Spleen cell types: 14
cat(paste0("  Bone Marrow cell types: ", length(bm_celltypes), "\n"))
##   Bone Marrow cell types: 17
cat(paste0("  Total unique: ", n_celltypes, "\n\n"))
##   Total unique: 17
# Create color palette
if (n_celltypes <= 12) {
  colors <- RColorBrewer::brewer.pal(max(n_celltypes, 3), "Set3")
} else if (n_celltypes <= 20) {
  colors <- c(
    RColorBrewer::brewer.pal(12, "Set3"),
    RColorBrewer::brewer.pal(8, "Set2")
  )[1:n_celltypes]
} else {
  colors <- colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))(n_celltypes)
}

names(colors) <- all_celltypes

cat("Color assignments:\n")
## Color assignments:
for (ct in all_celltypes) {
  cat(paste0("  ", ct, ": ", colors[ct], "\n"))
}
##   B cells: #8DD3C7
##   B cells, pro: #FFFFB3
##   Basophils: #BEBADA
##   DC: #FB8072
##   Endothelial cells: #80B1D3
##   Eosinophils: #FDB462
##   Fibroblasts: #B3DE69
##   ILC: #FCCDE5
##   Macrophages: #D9D9D9
##   Monocytes: #BC80BD
##   Neutrophils: #CCEBC5
##   NK cells: #FFED6F
##   NKT: #66C2A5
##   Stem cells: #FC8D62
##   Stromal cells: #8DA0CB
##   T cells: #E78AC3
##   Tgd: #A6D854
cat("\n")
# =============================================================================
# CREATE ALLUVIAL PLOT FOR SPLEEN
# =============================================================================

cat("Creating Spleen alluvial plot...\n")
## Creating Spleen alluvial plot...
# Prepare data
props_spl <- prop.table(comp_results_spl$counts, margin = 2) * 100
props_spl_df <- as.data.frame(props_spl)
colnames(props_spl_df) <- c("CellType", "Condition", "Proportion")

# Force order WT -> KO
props_spl_df$Condition <- factor(props_spl_df$Condition, levels = c("WT", "KO"))

# Force cell type order (matching global palette)
props_spl_df$CellType <- factor(props_spl_df$CellType, levels = all_celltypes)

# Get colors for spleen cell types
colors_spl <- colors[spl_celltypes]

# Create alluvial plot
p_alluvial_spl <- ggplot(props_spl_df,
                         aes(x = Condition,
                             stratum = CellType,
                             alluvium = CellType,
                             y = Proportion,
                             fill = CellType)) +
  geom_alluvium(alpha = 0.6, knot.pos = 0.4) +
  geom_stratum(alpha = 0.9, color = "grey70", width = 0.3) +
  scale_fill_manual(values = colors_spl, name = "Cell Type", drop = FALSE) +
  scale_x_discrete(limits = c("WT", "KO")) +
  labs(
    x = "",
    y = "Cell Proportion (%)",
    title = "Spleen - Cell Type Composition Flow",
    subtitle = "WT → KO"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
    plot.subtitle = element_text(hjust = 0.5, size = 10, color = "grey40"),
    legend.position = "right"
  )

# =============================================================================
# CREATE ALLUVIAL PLOT FOR BONE MARROW
# =============================================================================

cat("Creating Bone Marrow alluvial plot...\n")
## Creating Bone Marrow alluvial plot...
# Prepare data
props_bm <- prop.table(comp_results_bm$counts, margin = 2) * 100
props_bm_df <- as.data.frame(props_bm)
colnames(props_bm_df) <- c("CellType", "Condition", "Proportion")

# Force order WT -> KO
props_bm_df$Condition <- factor(props_bm_df$Condition, levels = c("WT", "KO"))

# Force cell type order (matching global palette)
props_bm_df$CellType <- factor(props_bm_df$CellType, levels = all_celltypes)

# Get colors for bone marrow cell types
colors_bm <- colors[bm_celltypes]

# Create alluvial plot
p_alluvial_bm <- ggplot(props_bm_df,
                        aes(x = Condition,
                            stratum = CellType,
                            alluvium = CellType,
                            y = Proportion,
                            fill = CellType)) +
  geom_alluvium(alpha = 0.6, knot.pos = 0.4) +
  geom_stratum(alpha = 0.9, color = "grey70", width = 0.3) +
  scale_fill_manual(values = colors_bm, name = "Cell Type", drop = FALSE) +
  scale_x_discrete(limits = c("WT", "KO")) +
  labs(
    x = "",
    y = "Cell Proportion (%)",
    title = "Bone Marrow - Cell Type Composition Flow",
    subtitle = "WT → KO"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(size = 14, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold"),
    plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
    plot.subtitle = element_text(hjust = 0.5, size = 10, color = "grey40"),
    legend.position = "right"
  )

# =============================================================================
# DISPLAY PLOTS SIDE BY SIDE
# =============================================================================

cat("Creating combined view...\n")
## Creating combined view...
# Display individual plots
print(p_alluvial_spl)

print(p_alluvial_bm)

# Create side-by-side comparison using patchwork
p_combined <- p_alluvial_spl + p_alluvial_bm +
  plot_layout(guides = "collect") +
  plot_annotation(
    title = "Cell Type Composition Comparison: Spleen vs Bone Marrow",
    subtitle = "Consistent colors across tissues show cell type identity",
    theme = theme(
      plot.title = element_text(size = 18, face = "bold", hjust = 0.5),
      plot.subtitle = element_text(size = 12, hjust = 0.5, color = "grey40")
    )
  )

print(p_combined)

# =============================================================================
# SAVE PLOTS
# =============================================================================

cat("Saving alluvial plots...\n")
## Saving alluvial plots...
# Save individual plots
ggsave("results/spleen/spleen_alluvial_plot.png", 
       plot = p_alluvial_spl, 
       width = 10, height = 8, dpi = 300)

ggsave("results/bonemarrow/bonemarrow_alluvial_plot.png", 
       plot = p_alluvial_bm, 
       width = 10, height = 8, dpi = 300)

# Save combined plot
ggsave("results/combined_alluvial_comparison.png",
       plot = p_combined,
       width = 18, height = 8, dpi = 300)

ggsave("results/combined_alluvial_comparison.pdf",
       plot = p_combined,
       width = 18, height = 8)

# Save color mapping for reference
color_mapping <- data.frame(
  CellType = names(colors),
  Color = colors,
  In_Spleen = names(colors) %in% spl_celltypes,
  In_BoneMarrow = names(colors) %in% bm_celltypes,
  stringsAsFactors = FALSE
)

write.csv(color_mapping, "results/celltype_color_mapping.csv", row.names = FALSE)

cat("\n✓ Alluvial plots created with consistent colors\n")
## 
## ✓ Alluvial plots created with consistent colors
cat("✓ Plots saved:\n")
## ✓ Plots saved:
cat("  - results/spleen/spleen_alluvial_plot.png\n")
##   - results/spleen/spleen_alluvial_plot.png
cat("  - results/bonemarrow/bonemarrow_alluvial_plot.png\n")
##   - results/bonemarrow/bonemarrow_alluvial_plot.png
cat("  - results/combined_alluvial_comparison.png\n")
##   - results/combined_alluvial_comparison.png
cat("  - results/combined_alluvial_comparison.pdf\n")
##   - results/combined_alluvial_comparison.pdf
cat("  - results/celltype_color_mapping.csv\n\n")
##   - results/celltype_color_mapping.csv
cat("\n=== COMBINED ALLUVIAL ANALYSIS COMPLETE ===\n\n")
## 
## === COMBINED ALLUVIAL ANALYSIS COMPLETE ===

10. Cross-Tissue Comparison

# =============================================================================
# TISSUE COMPARISON FUNCTIONS
# =============================================================================

convert_deg_list_to_df <- function(deg_list, tissue_name = "tissue") {
  
  if (!is.list(deg_list)) {
    stop(paste0(tissue_name, " DEG results is not a list"))
  }
  
  deg_df <- data.frame()
  
  for (ct in names(deg_list)) {
    ct_results <- deg_list[[ct]]
    if (nrow(ct_results) == 0) next
    
    ct_results$cluster <- ct
    
    if (!"gene" %in% colnames(ct_results)) {
      ct_results$gene <- rownames(ct_results)
    }
    
    deg_df <- rbind(deg_df, ct_results)
  }
  
  return(deg_df)
}

# -----------------------------------------------------------------------------
# DEG CORRELATION ANALYSIS
# -----------------------------------------------------------------------------

plot_tissue_comparison_improved <- function(deg_tissue1, deg_tissue2, 
                                             tissue1_name = "Tissue1",
                                             tissue2_name = "Tissue2",
                                             celltype_column = "cluster",
                                             min_genes = 10) {
  
  cat(paste0("\n=== COMPARING ", tissue1_name, " vs ", tissue2_name, " ===\n\n"))
  
  celltypes_tissue1 <- unique(deg_tissue1[[celltype_column]])
  celltypes_tissue2 <- unique(deg_tissue2[[celltype_column]])
  common_celltypes <- intersect(celltypes_tissue1, celltypes_tissue2)
  
  cat(paste0("Common cell types: ", length(common_celltypes), "\n\n"))
  
  if (length(common_celltypes) == 0) {
    cat("No common cell types found!\n")
    return(NULL)
  }
  
  comparison_results <- list()
  plot_list <- list()
  stats_summary <- data.frame()
  
  for (ct in common_celltypes) {
    
    deg1 <- deg_tissue1[deg_tissue1[[celltype_column]] == ct, ]
    deg2 <- deg_tissue2[deg_tissue2[[celltype_column]] == ct, ]
    common_genes <- intersect(deg1$gene, deg2$gene)
    
    if (length(common_genes) < min_genes) next
    
    merged <- merge(deg1[, c("gene", "avg_log2FC", "p_val_adj")],
                    deg2[, c("gene", "avg_log2FC", "p_val_adj")],
                    by = "gene",
                    suffixes = c(paste0("_", tissue1_name), paste0("_", tissue2_name)))
    
    colnames(merged) <- c("gene", 
                          paste0("log2FC_", tissue1_name),
                          paste0("padj_", tissue1_name),
                          paste0("log2FC_", tissue2_name),
                          paste0("padj_", tissue2_name))
    
    # Statistics
    cor_test <- cor.test(merged[[paste0("log2FC_", tissue1_name)]],
                         merged[[paste0("log2FC_", tissue2_name)]],
                         method = "pearson")
    
    pearson_r <- cor_test$estimate
    pearson_p <- cor_test$p.value
    r_squared <- pearson_r^2
    
    lm_fit <- lm(merged[[paste0("log2FC_", tissue2_name)]] ~ 
                   merged[[paste0("log2FC_", tissue1_name)]])
    slope <- coef(lm_fit)[2]
    intercept <- coef(lm_fit)[1]
    
    stats_summary <- rbind(stats_summary, data.frame(
      CellType = ct,
      N_genes = nrow(merged),
      Pearson_r = pearson_r,
      Pearson_p = pearson_p,
      R_squared = r_squared,
      Slope = slope,
      Intercept = intercept,
      Interpretation = ifelse(pearson_p < 0.001, "***",
                       ifelse(pearson_p < 0.01, "**",
                       ifelse(pearson_p < 0.05, "*", "ns")))
    ))
    
    # Plot
    max_log2fc <- max(abs(c(merged[[paste0("log2FC_", tissue1_name)]],
                            merged[[paste0("log2FC_", tissue2_name)]])))
    axis_limits <- c(-ceiling(max_log2fc), ceiling(max_log2fc))
    
    p_label <- ifelse(pearson_p < 0.001, "p < 0.001",
                      sprintf("p = %.3f", pearson_p))

    p <- ggplot(merged, aes(x = .data[[paste0("log2FC_", tissue1_name)]],
                            y = .data[[paste0("log2FC_", tissue2_name)]])) +
      geom_point(alpha = 0.4, size = 1.2, color = "steelblue") +
      geom_density_2d(color = "gray30", alpha = 0.3, linewidth = 0.3) +
      geom_abline(intercept = 0, slope = 1, 
                  color = "red", linetype = "dashed", linewidth = 1.2, alpha = 0.8) +
      geom_smooth(method = "lm", color = "#3498db", se = TRUE,
                  fill = "#3498db", alpha = 0.2, linewidth = 1.2) +
      geom_hline(yintercept = 0, linetype = "dashed", color = "grey50", linewidth = 0.5) +
      geom_vline(xintercept = 0, linetype = "dashed", color = "grey50", linewidth = 0.5) +
      coord_fixed(ratio = 1, xlim = axis_limits, ylim = axis_limits) +
      annotate("text", 
               x = axis_limits[1] + 0.1 * diff(axis_limits),
               y = axis_limits[2] - 0.1 * diff(axis_limits),
               label = sprintf("r = %.3f | R² = %.3f\n%s | n = %d",
                             pearson_r, r_squared, p_label, nrow(merged)),
               hjust = 0, vjust = 1, size = 3.5, fontface = "bold") +
      theme_classic(base_size = 12) +
      labs(
        title = ct,
        subtitle = sprintf("Pearson r = %.3f, R² = %.3f", pearson_r, r_squared),
        x = paste0("log₂ FC (", tissue1_name, ")"),
        y = paste0("log₂ FC (", tissue2_name, ")"),
        caption = "Red dashed: perfect correlation (x=y) | Blue: regression"
      ) +
      theme(
        plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 10, color = "gray30"),
        plot.caption = element_text(hjust = 0.5, size = 8, color = "grey50"),
        panel.grid.major = element_line(color = "gray95", linewidth = 0.3)
      )
    
    plot_list[[ct]] <- p
    comparison_results[[ct]] <- merged
  }
  
  # Summary plot
  if (nrow(stats_summary) > 0) {
    stats_summary <- stats_summary[order(-stats_summary$R_squared), ]
    stats_summary$CellType <- factor(stats_summary$CellType, levels = stats_summary$CellType)
    
    p_summary <- ggplot(stats_summary, aes(x = CellType, y = R_squared, fill = R_squared)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = sprintf("%.2f%s", R_squared, Interpretation)),
                hjust = -0.1, size = 3) +
      scale_fill_gradient2(low = "#3498db", mid = "#f39c12", high = "#e74c3c",
                          midpoint = 0.5, limits = c(0, 1), name = "R²") +
      coord_flip() +
      ylim(0, 1.15) +
      theme_classic(base_size = 12) +
      labs(title = "Cross-Tissue DEG Correlation Summary",
           subtitle = paste0(tissue1_name, " vs ", tissue2_name),
           x = "", y = "R²") +
      theme(plot.title = element_text(hjust = 0.5, face = "bold"))
    
    plot_list[["summary"]] <- p_summary
  }
  
  # Combined scatter plot
  scatter_plot_names <- setdiff(names(plot_list), "summary")
  
  if (length(scatter_plot_names) > 0) {
    n_plots <- length(scatter_plot_names)
    n_cols <- ceiling(sqrt(n_plots))
    
    combined_plot <- wrap_plots(plot_list[scatter_plot_names], ncol = n_cols) +
      plot_annotation(
        title = paste0("Cross-Tissue DEG Comparison: ", tissue1_name, " vs ", tissue2_name),
        theme = theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5))
      )
    
    plot_list[["combined_scatter"]] <- combined_plot
  }
  
  return(list(plots = plot_list, data = comparison_results, statistics = stats_summary))
}

# -----------------------------------------------------------------------------
# CLR COMPOSITIONAL COMPARISON
# -----------------------------------------------------------------------------

compare_clr_across_tissues <- function(clr_spl, clr_bm) {
  
  cat("\n=== CROSS-TISSUE CLR COMPARISON ===\n\n")
  
  merged <- merge(
    clr_spl$results[, c("CellType", "CLR_difference")],
    clr_bm$results[, c("CellType", "CLR_difference")],
    by = "CellType",
    suffixes = c("_Spleen", "_BoneMarrow")
  )
  
  cor_test <- cor.test(merged$CLR_difference_Spleen, 
                       merged$CLR_difference_BoneMarrow,
                       method = "pearson")
  
  cat(paste0("Cell types: ", nrow(merged), "\n"))
  cat(paste0("Correlation: r = ", round(cor_test$estimate, 3), 
             ", p = ", format.pval(cor_test$p.value, digits = 3), "\n\n"))
  
  merged$Consistent <- sign(merged$CLR_difference_Spleen) == 
                       sign(merged$CLR_difference_BoneMarrow) &
                       abs(merged$CLR_difference_Spleen) > 0.5 &
                       abs(merged$CLR_difference_BoneMarrow) > 0.5
  
  cat("Consistent changes:\n")
  consistent <- merged[merged$Consistent, ]
  if (nrow(consistent) > 0) print(consistent) else cat("  None\n")
  
  max_clr <- max(abs(c(merged$CLR_difference_Spleen, 
                       merged$CLR_difference_BoneMarrow)))
  axis_limits <- c(-ceiling(max_clr), ceiling(max_clr))
  
  p <- ggplot(merged, aes(x = CLR_difference_Spleen, 
                          y = CLR_difference_BoneMarrow,
                          label = CellType,
                          color = Consistent)) +
    geom_point(size = 3, alpha = 0.7) +
    geom_abline(intercept = 0, slope = 1, linetype = "dashed", 
                color = "red", linewidth = 1.2) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey70") +
    geom_vline(xintercept = 0, linetype = "dashed", color = "grey70") +
    geom_text_repel(size = 3, max.overlaps = 20) +
    scale_color_manual(values = c("FALSE" = "grey60", "TRUE" = "#e74c3c"),
                      name = "Consistent") +
    coord_fixed(ratio = 1, xlim = axis_limits, ylim = axis_limits) +
    theme_classic(base_size = 12) +
    labs(
      title = "Cross-Tissue CLR Comparison",
      subtitle = sprintf("r = %.3f (p = %s) | %d cell types",
                        cor_test$estimate, 
                        format.pval(cor_test$p.value, digits = 2),
                        nrow(merged)),
      x = "CLR Difference (Spleen)",
      y = "CLR Difference (Bone Marrow)",
      caption = "Red diagonal: perfect agreement | Consistent = same direction & |CLR| > 0.5"
    ) +
    theme(plot.title = element_text(hjust = 0.5, face = "bold"),
          plot.subtitle = element_text(hjust = 0.5))
  
  print(p)
  
  return(list(merged = merged, plot = p, correlation = cor_test))
}

# -----------------------------------------------------------------------------
# SHARED/UNIQUE DEGs ANALYSIS
# -----------------------------------------------------------------------------

analyze_shared_degs <- function(deg_list_tissue1, deg_list_tissue2,
                                 tissue1_name = "Tissue1",
                                 tissue2_name = "Tissue2",
                                 padj_cutoff = 0.05,
                                 logfc_cutoff = 0) {
  
  celltypes_t1 <- names(deg_list_tissue1)
  celltypes_t2 <- names(deg_list_tissue2)
  common_celltypes <- intersect(celltypes_t1, celltypes_t2)
  
  cat(paste0("\n=== SHARED/UNIQUE DEGs: ", length(common_celltypes), " cell types ===\n\n"))
  
  results_list <- list()
  summary_df <- data.frame()
  
  for (ct in common_celltypes) {
    
    cat(paste0(ct, ": "))
    
    deg1 <- deg_list_tissue1[[ct]]
    deg2 <- deg_list_tissue2[[ct]]
    
    # Significant DEGs
    sig1_up <- deg1$gene[deg1$avg_log2FC > logfc_cutoff & deg1$p_val_adj < padj_cutoff]
    sig1_down <- deg1$gene[deg1$avg_log2FC < -logfc_cutoff & deg1$p_val_adj < padj_cutoff]
    sig2_up <- deg2$gene[deg2$avg_log2FC > logfc_cutoff & deg2$p_val_adj < padj_cutoff]
    sig2_down <- deg2$gene[deg2$avg_log2FC < -logfc_cutoff & deg2$p_val_adj < padj_cutoff]
    
    # Shared and unique
    shared_up <- intersect(sig1_up, sig2_up)
    shared_down <- intersect(sig1_down, sig2_down)
    unique_t1_up <- setdiff(sig1_up, sig2_up)
    unique_t1_down <- setdiff(sig1_down, sig2_down)
    unique_t2_up <- setdiff(sig2_up, sig1_up)
    unique_t2_down <- setdiff(sig2_down, sig1_down)
    discordant_t1up_t2down <- intersect(sig1_up, sig2_down)
    discordant_t1down_t2up <- intersect(sig1_down, sig2_up)
    
    cat(sprintf("Shared ↑%d ↓%d | Unique %s ↑%d ↓%d | Unique %s ↑%d ↓%d\n",
                length(shared_up), length(shared_down),
                tissue1_name, length(unique_t1_up), length(unique_t1_down),
                tissue2_name, length(unique_t2_up), length(unique_t2_down)))
    
    # Add to summary
    summary_df <- rbind(summary_df, data.frame(
      CellType = ct,
      Shared_UP = length(shared_up),
      Shared_DOWN = length(shared_down),
      Total_Shared = length(shared_up) + length(shared_down),
      Unique_T1_UP = length(unique_t1_up),
      Unique_T1_DOWN = length(unique_t1_down),
      Unique_T2_UP = length(unique_t2_up),
      Unique_T2_DOWN = length(unique_t2_down),
      Discordant = length(discordant_t1up_t2down) + length(discordant_t1down_t2up),
      stringsAsFactors = FALSE
    ))
    
    # Store gene lists
    results_list[[ct]] <- list(
      shared_up = shared_up, 
      shared_down = shared_down,
      unique_t1_up = unique_t1_up, 
      unique_t1_down = unique_t1_down,
      unique_t2_up = unique_t2_up, 
      unique_t2_down = unique_t2_down,
      discordant_t1up_t2down = discordant_t1up_t2down,
      discordant_t1down_t2up = discordant_t1down_t2up,
      deg1 = deg1, 
      deg2 = deg2
    )
  }
  
  # Sort summary
  summary_df <- summary_df[order(-summary_df$Total_Shared), ]
  
  return(list(results = results_list, summary = summary_df))
}

# -----------------------------------------------------------------------------
# BASELINE EXPRESSION COMPARISON (CON SCATTER Y MA PLOTS)
# -----------------------------------------------------------------------------

compare_baseline_expression_pseudobulk <- function(
    seurat_spl, seurat_bm,
    celltype_column = "SingleR_clean",
    condition_column = "condition",
    min_cells_per_type = 30,
    min_genes = 500,
    use_pseudobulk = TRUE) {
  
  cat("\n=== BASELINE EXPRESSION COMPARISON ===\n")
  cat(sprintf("Method: %s | Min cells: %d | Min genes: %d\n\n",
              ifelse(use_pseudobulk, "Pseudo-bulk", "Simple average"),
              min_cells_per_type, min_genes))
  
  celltypes_spl <- unique(seurat_spl@meta.data[[celltype_column]])
  celltypes_bm <- unique(seurat_bm@meta.data[[celltype_column]])
  common_celltypes <- intersect(celltypes_spl, celltypes_bm)
  
  cat(paste0("Common cell types: ", length(common_celltypes), "\n\n"))
  
  results_list <- list()
  
  for (ct in common_celltypes) {
    
    cat(paste0(ct, ": "))
    
    cells_spl <- colnames(seurat_spl)[seurat_spl@meta.data[[celltype_column]] == ct]
    cells_bm <- colnames(seurat_bm)[seurat_bm@meta.data[[celltype_column]] == ct]
    
    cat(sprintf("Spl %d cells, BM %d cells", length(cells_spl), length(cells_bm)))
    
    if (length(cells_spl) < min_cells_per_type | length(cells_bm) < min_cells_per_type) {
      cat(" → SKIPPED (insufficient)\n")
      next
    }
    
    DefaultAssay(seurat_spl) <- "RNA"
    DefaultAssay(seurat_bm) <- "RNA"
    
    expr_spl <- GetAssayData(seurat_spl, slot = "data")[, cells_spl]
    expr_bm <- GetAssayData(seurat_bm, slot = "data")[, cells_bm]
    
    if (use_pseudobulk) {
      conditions_spl <- seurat_spl@meta.data[cells_spl, condition_column]
      conditions_bm <- seurat_bm@meta.data[cells_bm, condition_column]
      
      wt_cells_spl <- cells_spl[conditions_spl == "WT"]
      ko_cells_spl <- cells_spl[conditions_spl == "KO"]
      wt_cells_bm <- cells_bm[conditions_bm == "WT"]
      ko_cells_bm <- cells_bm[conditions_bm == "KO"]
      
      avg_expr_spl <- rowMeans(cbind(
        rowMeans(expr_spl[, wt_cells_spl, drop = FALSE]),
        rowMeans(expr_spl[, ko_cells_spl, drop = FALSE])
      ))
      
      avg_expr_bm <- rowMeans(cbind(
        rowMeans(expr_bm[, wt_cells_bm, drop = FALSE]),
        rowMeans(expr_bm[, ko_cells_bm, drop = FALSE])
      ))
    } else {
      avg_expr_spl <- rowMeans(expr_spl)
      avg_expr_bm <- rowMeans(expr_bm)
    }
    
    common_genes <- intersect(names(avg_expr_spl), names(avg_expr_bm))
    expressed_genes <- common_genes[(avg_expr_spl[common_genes] > 0.1) | 
                                    (avg_expr_bm[common_genes] > 0.1)]
    
    if (length(expressed_genes) < min_genes) {
      cat(" → SKIPPED (too few genes)\n")
      next
    }
    
    expr_df <- data.frame(
      gene = expressed_genes,
      spleen_log2 = log2(avg_expr_spl[expressed_genes] + 1),
      bonemarrow_log2 = log2(avg_expr_bm[expressed_genes] + 1),
      stringsAsFactors = FALSE
    )
    
    expr_df$M <- expr_df$spleen_log2 - expr_df$bonemarrow_log2
    expr_df$A <- (expr_df$spleen_log2 + expr_df$bonemarrow_log2) / 2
    
    cor_pearson <- cor.test(expr_df$spleen_log2, expr_df$bonemarrow_log2, method = "pearson")
    cor_spearman <- cor.test(expr_df$spleen_log2, expr_df$bonemarrow_log2, method = "spearman")
    
    mean_M <- mean(expr_df$M)
    sd_M <- sd(expr_df$M)
    
    cat(sprintf(" → r=%.3f, M=%.3f\n", cor_pearson$estimate, mean_M))
    
    results_list[[ct]] <- list(
      data = expr_df,
      pearson_r = cor_pearson$estimate, pearson_p = cor_pearson$p.value,
      spearman_rho = cor_spearman$estimate, spearman_p = cor_spearman$p.value,
      mean_M = mean_M, sd_M = sd_M,
      n_genes = nrow(expr_df),
      n_cells_spl = length(cells_spl), n_cells_bm = length(cells_bm)
    )
  }
  
  cat("\n✓ Complete\n\n")
  return(results_list)
}

# Crear SCATTER PLOTS para baseline
plot_baseline_scatter <- function(baseline_results, celltype) {
  
  result <- baseline_results[[celltype]]
  expr_df <- result$data
  
  max_val <- max(c(expr_df$spleen_log2, expr_df$bonemarrow_log2))
  axis_limits <- c(0, ceiling(max_val))
  
  p <- ggplot(expr_df, aes(x = spleen_log2, y = bonemarrow_log2)) +
    geom_hex(bins = 50, aes(fill = after_stat(count))) +
    scale_fill_viridis_c(name = "Gene\nDensity", trans = "log10", option = "plasma") +
    geom_abline(intercept = 0, slope = 1, 
                color = "red", linetype = "dashed", linewidth = 1.2, alpha = 0.8) +
    geom_smooth(method = "lm", color = "blue", se = TRUE, 
                fill = "lightblue", alpha = 0.2, linewidth = 1) +
    coord_fixed(ratio = 1, xlim = axis_limits, ylim = axis_limits) +
    annotate("text", 
             x = axis_limits[1] + 0.5, 
             y = axis_limits[2] - 1,
             label = sprintf(
               "Pearson r = %.3f (p = %.2e)\nSpearman ρ = %.3f\n%d genes | %d Spl, %d BM cells",
               result$pearson_r, result$pearson_p, result$spearman_rho, 
               result$n_genes, result$n_cells_spl, result$n_cells_bm
             ),
             hjust = 0, vjust = 1, size = 3.5, fontface = "bold") +
    theme_classic(base_size = 12) +
    labs(
      title = paste0("Baseline Expression - ", celltype),
      subtitle = "Do genes have similar expression in both tissues?",
      x = "log₂(mean expression + 1) - Spleen",
      y = "log₂(mean expression + 1) - Bone Marrow",
      caption = "Red: perfect correlation (x=y) | Blue: regression | r > 0.8 = comparable"
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
      plot.subtitle = element_text(hjust = 0.5, size = 11, color = "grey30"),
      plot.caption = element_text(hjust = 0.5, size = 8, color = "grey50", lineheight = 1.2),
      aspect.ratio = 1
    )
  
  return(p)
}

# Crear MA PLOTS para baseline
plot_baseline_ma <- function(baseline_results, celltype) {
  
  result <- baseline_results[[celltype]]
  expr_df <- result$data
  mean_M <- result$mean_M
  sd_M <- result$sd_M
  
  expr_df$Outlier <- abs(expr_df$M - mean_M) > 2 * sd_M
  
  p <- ggplot(expr_df, aes(x = A, y = M)) +
    geom_hex(bins = 50, aes(fill = after_stat(count))) +
    scale_fill_viridis_c(name = "Gene\nDensity", trans = "log10", option = "plasma") +
    geom_point(data = subset(expr_df, Outlier), 
               color = "red", size = 1, alpha = 0.5) +
    geom_hline(yintercept = 0, color = "darkgreen", 
               linetype = "solid", linewidth = 1.2, alpha = 0.7) +
    geom_hline(yintercept = mean_M, color = "red", linewidth = 1, alpha = 0.8) +
    geom_hline(yintercept = mean_M + 2*sd_M, 
               color = "red", linetype = "dashed", linewidth = 0.8) +
    geom_hline(yintercept = mean_M - 2*sd_M, 
               color = "red", linetype = "dashed", linewidth = 0.8) +
    geom_smooth(method = "loess", color = "blue", se = TRUE,
                fill = "lightblue", alpha = 0.2, linewidth = 1) +
    theme_classic(base_size = 12) +
    labs(
      title = paste0("MA Plot - ", celltype),
      subtitle = "Systematic bias detection",
      x = "A = Average log₂ Expression [(Spleen + BM) / 2]",
      y = "M = log₂ Ratio [Spleen / BM]",
      caption = sprintf(
        "Green (M=0): no bias | Red (M=%.2f): mean bias | Dashed: ±2SD [%.2f, %.2f]\nM≈0 = no bias | M>0 = Spleen higher | M<0 = BM higher",
        mean_M, mean_M - 2*sd_M, mean_M + 2*sd_M
      )
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
      plot.subtitle = element_text(hjust = 0.5, size = 11, color = "grey30"),
      plot.caption = element_text(hjust = 0.5, size = 8, color = "grey50", lineheight = 1.2)
    )
  
  return(p)
}

# Summary plot
plot_baseline_summary <- function(baseline_results) {
  
  summary_df <- do.call(rbind, lapply(names(baseline_results), function(ct) {
    r <- baseline_results[[ct]]
    data.frame(
      CellType = ct,
      Pearson_r = r$pearson_r,
      Mean_M = r$mean_M,
      N_genes = r$n_genes,
      N_cells_spleen = r$n_cells_spl,
      N_cells_bonemarrow = r$n_cells_bm,
      stringsAsFactors = FALSE
    )
  }))
  
  summary_df <- summary_df[order(-summary_df$Pearson_r), ]
  summary_df$CellType <- factor(summary_df$CellType, levels = summary_df$CellType)
  
  summary_df$Quality <- case_when(
    summary_df$Pearson_r > 0.9 ~ "Excellent (r > 0.9)",
    summary_df$Pearson_r > 0.8 ~ "Good (r > 0.8)",
    summary_df$Pearson_r > 0.7 ~ "Moderate (r > 0.7)",
    summary_df$Pearson_r > 0.5 ~ "Fair (r > 0.5)",
    TRUE ~ "Poor (r < 0.5)"
  )
  
  summary_df$Quality <- factor(summary_df$Quality, 
                               levels = c("Excellent (r > 0.9)", "Good (r > 0.8)", 
                                         "Moderate (r > 0.7)", "Fair (r > 0.5)", 
                                         "Poor (r < 0.5)"))
  
  p <- ggplot(summary_df, aes(x = CellType, y = Pearson_r, fill = Quality)) +
    geom_bar(stat = "identity") +
    geom_hline(yintercept = c(0.5, 0.7, 0.8, 0.9), 
               linetype = "dashed", color = "grey50", alpha = 0.7) +
    geom_text(aes(label = sprintf("r=%.2f\nn=%d", Pearson_r, N_genes)),
              vjust = -0.3, size = 2.8, fontface = "bold") +
    scale_fill_manual(
      values = c("Excellent (r > 0.9)" = "#27ae60", "Good (r > 0.8)" = "#f39c12",
                "Moderate (r > 0.7)" = "#e67e22", "Fair (r > 0.5)" = "#e74c3c",
                "Poor (r < 0.5)" = "#c0392b"),
      name = "Correlation"
    ) +
    coord_flip() +
    ylim(0, 1.15) +
    theme_classic(base_size = 12) +
    labs(
      title = "Baseline Expression Correlation Summary",
      subtitle = "Spleen vs Bone Marrow - Are cell types comparable?",
      x = "Cell Type", y = "Pearson Correlation (r)",
      caption = "r > 0.8: comparable | r < 0.7: tissue-specific programs"
    ) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      plot.subtitle = element_text(hjust = 0.5, size = 11),
      plot.caption = element_text(hjust = 0.5, size = 9, color = "grey50")
    )
  
  return(list(plot = p, table = summary_df))
}

# -----------------------------------------------------------------------------
# MASTER EXPORT FUNCTION (OPTIMIZED)
# -----------------------------------------------------------------------------

export_cross_tissue_results <- function(
    deg_comparison = NULL,
    clr_comparison = NULL,
    baseline_results = NULL,
    baseline_summary = NULL,
    shared_degs = NULL,
    output_dir = "results") {
  
  comp_dir <- file.path(output_dir, "tissue_comparison")
  dir.create(comp_dir, recursive = TRUE, showWarnings = FALSE)
  
  cat("\n=== EXPORTING CROSS-TISSUE RESULTS ===\n\n")
  
  # 1. DEG Correlation
  if (!is.null(deg_comparison)) {
    cat("1. DEG correlation...\n")
    
    write.csv(deg_comparison$statistics,
              file.path(comp_dir, "deg_correlation_statistics.csv"),
              row.names = FALSE)
    
    if ("combined_scatter" %in% names(deg_comparison$plots)) {
      ggsave(file.path(comp_dir, "deg_correlation_combined.png"),
             plot = deg_comparison$plots$combined_scatter,
             width = 16, height = 12, dpi = 300)
    }
    
    cat("   ✓ Saved\n")
  }
  
  # 2. CLR Comparison
  if (!is.null(clr_comparison)) {
    cat("2. CLR comparison...\n")
    
    write.csv(clr_comparison$merged,
              file.path(comp_dir, "clr_comparison_table.csv"),
              row.names = FALSE)
    
    ggsave(file.path(comp_dir, "clr_comparison.png"),
           plot = clr_comparison$plot, width = 10, height = 8, dpi = 300)
    
    cat("   ✓ Saved\n")
  }
  
  # 3. Baseline Expression
  if (!is.null(baseline_summary)) {
    cat("3. Baseline expression...\n")
    
    write.csv(baseline_summary$table,
              file.path(comp_dir, "baseline_correlation_summary.csv"),
              row.names = FALSE)
    
    ggsave(file.path(comp_dir, "baseline_correlation_summary.png"),
           plot = baseline_summary$plot, width = 12, height = 10, dpi = 300)
    
    # Scatter and MA plots for each cell type
    if (!is.null(baseline_results)) {
      for (ct in names(baseline_results)) {
        ct_clean <- gsub("[/ ]", "_", ct)
        
        p_scatter <- plot_baseline_scatter(baseline_results, ct)
        ggsave(file.path(comp_dir, paste0("baseline_scatter_", ct_clean, ".png")),
               plot = p_scatter, width = 10, height = 10, dpi = 300)
        
        p_ma <- plot_baseline_ma(baseline_results, ct)
        ggsave(file.path(comp_dir, paste0("baseline_MA_", ct_clean, ".png")),
               plot = p_ma, width = 10, height = 8, dpi = 300)
      }
    }
    
    cat("   ✓ Saved\n")
  }
  
  # 4. Shared DEGs
  if (!is.null(shared_degs)) {
    cat("4. Shared/Unique DEGs...\n")
    
    write.csv(shared_degs$summary,
              file.path(comp_dir, "shared_unique_degs_summary.csv"),
              row.names = FALSE)
    
    # Individual gene lists
    for (ct in names(shared_degs$results)) {
      r <- shared_degs$results[[ct]]
      ct_clean <- gsub("[/ ]", "_", ct)
      
      if (length(r$shared_up) > 0) {
        shared_up_df <- data.frame(
          gene = r$shared_up,
          log2FC_T1 = r$deg1[r$deg1$gene %in% r$shared_up, "avg_log2FC"],
          padj_T1 = r$deg1[r$deg1$gene %in% r$shared_up, "p_val_adj"],
          log2FC_T2 = r$deg2[r$deg2$gene %in% r$shared_up, "avg_log2FC"],
          padj_T2 = r$deg2[r$deg2$gene %in% r$shared_up, "p_val_adj"]
        )
        write.csv(shared_up_df,
                  file.path(comp_dir, paste0("shared_UP_", ct_clean, ".csv")),
                  row.names = FALSE)
      }
      
      if (length(r$shared_down) > 0) {
        shared_down_df <- data.frame(
          gene = r$shared_down,
          log2FC_T1 = r$deg1[r$deg1$gene %in% r$shared_down, "avg_log2FC"],
          padj_T1 = r$deg1[r$deg1$gene %in% r$shared_down, "p_val_adj"],
          log2FC_T2 = r$deg2[r$deg2$gene %in% r$shared_down, "avg_log2FC"],
          padj_T2 = r$deg2[r$deg2$gene %in% r$shared_down, "p_val_adj"]
        )
        write.csv(shared_down_df,
                  file.path(comp_dir, paste0("shared_DOWN_", ct_clean, ".csv")),
                  row.names = FALSE)
      }
    }
    
    cat("   ✓ Saved\n")
  }
  
  # 5. Master PDF
  cat("5. Creating master PDF...\n")
  
  pdf(file.path(comp_dir, "CROSS_TISSUE_COMPLETE_REPORT.pdf"),
      width = 16, height = 12)
  
  if (!is.null(baseline_summary)) print(baseline_summary$plot)
  if (!is.null(clr_comparison)) print(clr_comparison$plot)
  if (!is.null(deg_comparison) && "summary" %in% names(deg_comparison$plots)) {
    print(deg_comparison$plots$summary)
  }
  if (!is.null(deg_comparison) && "combined_scatter" %in% names(deg_comparison$plots)) {
    print(deg_comparison$plots$combined_scatter)
  }
  
  # Add scatter and MA plots for baseline
  if (!is.null(baseline_results)) {
    for (ct in names(baseline_results)) {
      print(plot_baseline_scatter(baseline_results, ct))
      print(plot_baseline_ma(baseline_results, ct))
    }
  }
  
  dev.off()
  
  cat("   ✓ PDF created\n\n")
  cat(sprintf("All results saved to: %s\n", comp_dir))
}

cat("✓ Cross-tissue comparison functions loaded\n")
## ✓ Cross-tissue comparison functions loaded
cat("\n=== PREPARING DEGs ===\n\n")
## 
## === PREPARING DEGs ===
deg_results_spl_df <- convert_deg_list_to_df(deg_results_spl, "Spleen")
deg_results_bm_df <- convert_deg_list_to_df(deg_results_bm, "Bone Marrow")

cat(sprintf("Spleen: %d DEGs across %d cell types\n",
            nrow(deg_results_spl_df), length(unique(deg_results_spl_df$cluster))))
## Spleen: 56442 DEGs across 12 cell types
cat(sprintf("Bone Marrow: %d DEGs across %d cell types\n\n",
            nrow(deg_results_bm_df), length(unique(deg_results_bm_df$cluster))))
## Bone Marrow: 72012 DEGs across 14 cell types
cat("\n")
cat("================================================================================\n")
## ================================================================================
cat("  CROSS-TISSUE COMPARISON - COMPLETE ANALYSIS\n")
##   CROSS-TISSUE COMPARISON - COMPLETE ANALYSIS
cat("================================================================================\n\n")
## ================================================================================
# 1. DEG Correlation
deg_comp <- plot_tissue_comparison_improved(
  deg_tissue1 = deg_results_spl_df,
  deg_tissue2 = deg_results_bm_df,
  tissue1_name = "Spleen",
  tissue2_name = "BoneMarrow",
  celltype_column = "cluster",
  min_genes = PARAMS$comparison$min_common_genes
)
## 
## === COMPARING Spleen vs BoneMarrow ===
## 
## Common cell types: 11
if (!is.null(deg_comp)) {
  print(deg_comp$plots$summary)
  print(deg_comp$plots$combined_scatter)
  print(knitr::kable(deg_comp$statistics[, c("CellType", "N_genes", "Pearson_r", 
                                              "R_squared", "Interpretation")],
                     digits = 3, caption = "DEG Correlation Statistics"))
}

## 
## 
## Table: DEG Correlation Statistics
## 
## |      |CellType    | N_genes| Pearson_r| R_squared|Interpretation |
## |:-----|:-----------|-------:|---------:|---------:|:--------------|
## |cor2  |T cells     |    1773|     0.668|     0.446|***            |
## |cor3  |B cells     |    1599|     0.582|     0.338|***            |
## |cor9  |Monocytes   |    1844|     0.207|     0.043|***            |
## |cor4  |Neutrophils |    1008|     0.181|     0.033|***            |
## |cor5  |DC          |    3511|     0.101|     0.010|***            |
## |cor7  |Macrophages |    2883|     0.050|     0.002|**             |
## |cor1  |Stem cells  |    3391|    -0.042|     0.002|*              |
## |cor   |NKT         |    3265|     0.041|     0.002|*              |
## |cor8  |Basophils   |    4109|    -0.030|     0.001|ns             |
## |cor6  |NK cells    |    3576|    -0.020|     0.000|ns             |
## |cor10 |ILC         |    3808|     0.015|     0.000|ns             |
# 2. CLR Compositional
clr_comp <- compare_clr_across_tissues(
  results_spleen$clr_compositional,
  results_bonemarrow$clr_compositional
)
## 
## === CROSS-TISSUE CLR COMPARISON ===
## 
## Cell types: 14
## Correlation: r = 0.155, p = 0.597
## 
## Consistent changes:
##       CellType CLR_difference_Spleen CLR_difference_BoneMarrow Consistent
## 5  Eosinophils            -5.3129404                -0.5567540       TRUE
## 9  Neutrophils            -0.6208078                -0.5088811       TRUE
## 14         Tgd            -1.1016955                -2.5708761       TRUE

# 3. Shared/Unique DEGs (SIN VENN DIAGRAMS)
shared_degs <- analyze_shared_degs(
  deg_list_tissue1 = deg_results_spl,
  deg_list_tissue2 = deg_results_bm,
  tissue1_name = "Spleen",
  tissue2_name = "BoneMarrow",
  padj_cutoff = PARAMS$deg$padj_cutoff,
  logfc_cutoff = 0
)
## 
## === SHARED/UNIQUE DEGs: 11 cell types ===
## 
## NKT: Shared ↑0 ↓0 | Unique Spleen ↑1 ↓0 | Unique BoneMarrow ↑0 ↓5
## Stem cells: Shared ↑0 ↓0 | Unique Spleen ↑1 ↓0 | Unique BoneMarrow ↑359 ↓57
## T cells: Shared ↑43 ↓24 | Unique Spleen ↑774 ↓99 | Unique BoneMarrow ↑30 ↓40
## B cells: Shared ↑169 ↓17 | Unique Spleen ↑567 ↓84 | Unique BoneMarrow ↑707 ↓132
## Neutrophils: Shared ↑1 ↓3 | Unique Spleen ↑7 ↓10 | Unique BoneMarrow ↑257 ↓77
## DC: Shared ↑0 ↓0 | Unique Spleen ↑0 ↓0 | Unique BoneMarrow ↑3 ↓13
## NK cells: Shared ↑0 ↓0 | Unique Spleen ↑4 ↓0 | Unique BoneMarrow ↑1 ↓8
## Macrophages: Shared ↑0 ↓0 | Unique Spleen ↑3 ↓0 | Unique BoneMarrow ↑13 ↓18
## Basophils: Shared ↑0 ↓0 | Unique Spleen ↑0 ↓0 | Unique BoneMarrow ↑0 ↓2
## Monocytes: Shared ↑5 ↓4 | Unique Spleen ↑3 ↓6 | Unique BoneMarrow ↑86 ↓64
## ILC: Shared ↑0 ↓0 | Unique Spleen ↑0 ↓0 | Unique BoneMarrow ↑0 ↓0
print(knitr::kable(shared_degs$summary, 
                   caption = "Shared and Unique DEGs Summary"))
## 
## 
## Table: Shared and Unique DEGs Summary
## 
## |   |CellType    | Shared_UP| Shared_DOWN| Total_Shared| Unique_T1_UP| Unique_T1_DOWN| Unique_T2_UP| Unique_T2_DOWN| Discordant|
## |:--|:-----------|---------:|-----------:|------------:|------------:|--------------:|------------:|--------------:|----------:|
## |4  |B cells     |       169|          17|          186|          567|             84|          707|            132|         21|
## |3  |T cells     |        43|          24|           67|          774|             99|           30|             40|          3|
## |10 |Monocytes   |         5|           4|            9|            3|              6|           86|             64|          2|
## |5  |Neutrophils |         1|           3|            4|            7|             10|          257|             77|          2|
## |1  |NKT         |         0|           0|            0|            1|              0|            0|              5|          0|
## |2  |Stem cells  |         0|           0|            0|            1|              0|          359|             57|          0|
## |6  |DC          |         0|           0|            0|            0|              0|            3|             13|          0|
## |7  |NK cells    |         0|           0|            0|            4|              0|            1|              8|          0|
## |8  |Macrophages |         0|           0|            0|            3|              0|           13|             18|          0|
## |9  |Basophils   |         0|           0|            0|            0|              0|            0|              2|          0|
## |11 |ILC         |         0|           0|            0|            0|              0|            0|              0|          0|
# 4. Baseline Expression (CON SCATTER Y MA PLOTS)
baseline_results <- compare_baseline_expression_pseudobulk(
  seurat_spl = spl_integrated,
  seurat_bm = bm_integrated,
  celltype_column = "SingleR_clean",
  condition_column = "condition",
  min_cells_per_type = 30,
  min_genes = 500,
  use_pseudobulk = TRUE
)
## 
## === BASELINE EXPRESSION COMPARISON ===
## Method: Pseudo-bulk | Min cells: 30 | Min genes: 500
## 
## Common cell types: 14
## 
## NKT: Spl 100 cells, BM 65 cells
##  → r=0.940, M=-0.036
## Stem cells: Spl 106 cells, BM 1279 cells
##  → r=0.893, M=-0.020
## T cells: Spl 2153 cells, BM 555 cells → r=0.979, M=-0.008
## B cells: Spl 2215 cells, BM 938 cells
##  → r=0.902, M=0.003
## Neutrophils: Spl 691 cells, BM 3260 cells → r=0.935, M=-0.018
## DC: Spl 126 cells, BM 182 cells
##  → r=0.839, M=-0.092
## NK cells: Spl 109 cells, BM 91 cells
##  → r=0.945, M=-0.029
## Macrophages: Spl 219 cells, BM 286 cells
##  → r=0.893, M=-0.001
## Basophils: Spl 15 cells, BM 37 cells → SKIPPED (insufficient)
## Tgd: Spl 31 cells, BM 17 cells → SKIPPED (insufficient)
## Monocytes: Spl 302 cells, BM 822 cells → r=0.956, M=-0.043
## Eosinophils: Spl 1 cells, BM 2 cells → SKIPPED (insufficient)
## ILC: Spl 19 cells, BM 22 cells → SKIPPED (insufficient)
## B cells, pro: Spl 1 cells, BM 9 cells → SKIPPED (insufficient)
## 
## ✓ Complete
baseline_summary <- plot_baseline_summary(baseline_results)
print(baseline_summary$plot)

print(knitr::kable(baseline_summary$table, digits = 3,
                   caption = "Baseline Expression Correlation"))
## 
## 
## Table: Baseline Expression Correlation
## 
## |     |CellType    | Pearson_r| Mean_M| N_genes| N_cells_spleen| N_cells_bonemarrow|Quality             |
## |:----|:-----------|---------:|------:|-------:|--------------:|------------------:|:-------------------|
## |cor2 |T cells     |     0.979| -0.008|    7044|           2153|                555|Excellent (r > 0.9) |
## |cor8 |Monocytes   |     0.956| -0.043|    7301|            302|                822|Excellent (r > 0.9) |
## |cor6 |NK cells    |     0.945| -0.029|    7375|            109|                 91|Excellent (r > 0.9) |
## |cor  |NKT         |     0.940| -0.036|    7269|            100|                 65|Excellent (r > 0.9) |
## |cor4 |Neutrophils |     0.935| -0.018|    5100|            691|               3260|Excellent (r > 0.9) |
## |cor3 |B cells     |     0.902|  0.003|    7293|           2215|                938|Excellent (r > 0.9) |
## |cor1 |Stem cells  |     0.893| -0.020|    8168|            106|               1279|Good (r > 0.8)      |
## |cor7 |Macrophages |     0.893| -0.001|    6660|            219|                286|Good (r > 0.8)      |
## |cor5 |DC          |     0.839| -0.092|    7745|            126|                182|Good (r > 0.8)      |
# Display scatter and MA plots for top 3 cell types (by correlation)
top_celltypes <- baseline_summary$table$CellType[1:min(3, nrow(baseline_summary$table))]

for (ct in top_celltypes) {
  cat(paste0("\n=== ", ct, " ===\n"))
  print(plot_baseline_scatter(baseline_results, ct))
  print(plot_baseline_ma(baseline_results, ct))
}
## 
## === T cells ===

## 
## === Monocytes ===

## 
## === NK cells ===

# 5. EXPORT ALL (UNA SOLA LLAMADA)
export_cross_tissue_results(
  deg_comparison = deg_comp,
  clr_comparison = clr_comp,
  baseline_results = baseline_results,
  baseline_summary = baseline_summary,
  shared_degs = shared_degs,
  output_dir = PARAMS$output$base_dir
)
## 
## === EXPORTING CROSS-TISSUE RESULTS ===
## 
## 1. DEG correlation...
##    ✓ Saved
## 2. CLR comparison...
##    ✓ Saved
## 3. Baseline expression...
##    ✓ Saved
## 4. Shared/Unique DEGs...
##    ✓ Saved
## 5. Creating master PDF...
##    ✓ PDF created
## 
## All results saved to: results/tissue_comparison
cat("\n✓ Cross-tissue analysis complete\n\n")
## 
## ✓ Cross-tissue analysis complete

11. Session Info

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] future_1.68.0               compositions_2.0-9         
##  [3] reshape2_1.4.5              msigdbr_25.1.1             
##  [5] fgsea_1.30.0                ggalluvial_0.12.5          
##  [7] RColorBrewer_1.1-3          ggrepel_0.9.6              
##  [9] cacoa_0.5.0                 Matrix_1.7-4               
## [11] patchwork_1.3.2             dplyr_1.1.4                
## [13] stringr_1.6.0               org.Mm.eg.db_3.19.1        
## [15] AnnotationDbi_1.66.0        clusterProfiler_4.12.6     
## [17] ggplot2_4.0.1               celldex_1.14.0             
## [19] SingleR_2.6.0               scDblFinder_1.18.0         
## [21] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [23] Biobase_2.64.0              GenomicRanges_1.56.2       
## [25] GenomeInfoDb_1.40.1         IRanges_2.38.1             
## [27] S4Vectors_0.42.1            BiocGenerics_0.50.0        
## [29] MatrixGenerics_1.16.0       matrixStats_1.5.0          
## [31] Seurat_5.3.1                SeuratObject_5.2.0         
## [33] sp_2.2-0                   
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2         goftest_1.2-3            
##   [3] Biostrings_2.72.1         HDF5Array_1.32.1         
##   [5] vctrs_0.6.5               spatstat.random_3.4-3    
##   [7] digest_0.6.39             png_0.1-8                
##   [9] gypsum_1.0.1              deldir_2.0-4             
##  [11] parallelly_1.45.1         MASS_7.3-65              
##  [13] fontLiberation_0.1.0      httpuv_1.6.16            
##  [15] qvalue_2.36.0             withr_3.0.2              
##  [17] ggrastr_1.0.2             psych_2.5.6              
##  [19] xfun_0.54                 ggfun_0.2.0              
##  [21] survival_3.8-3            memoise_2.0.1            
##  [23] hexbin_1.28.5             ggbeeswarm_0.7.3         
##  [25] gson_0.1.0                systemfonts_1.3.1        
##  [27] ragg_1.5.0                tidytree_0.4.6           
##  [29] zoo_1.8-14                pbapply_1.7-4            
##  [31] DEoptimR_1.1-4            R.oo_1.27.1              
##  [33] KEGGREST_1.44.1           promises_1.5.0           
##  [35] otel_0.2.0                httr_1.4.7               
##  [37] restfulr_0.0.16           globals_0.18.0           
##  [39] fitdistrplus_1.2-4        rhdf5filters_1.16.0      
##  [41] rhdf5_2.48.0              rstudioapi_0.17.1        
##  [43] UCSC.utils_1.0.0          miniUI_0.1.2             
##  [45] generics_0.1.4            DOSE_3.30.5              
##  [47] babelgene_22.9            curl_7.0.0               
##  [49] zlibbioc_1.50.0           ScaledMatrix_1.12.0      
##  [51] ggraph_2.2.2              polyclip_1.10-7          
##  [53] quadprog_1.5-8            GenomeInfoDbData_1.2.12  
##  [55] ExperimentHub_2.12.0      SparseArray_1.4.8        
##  [57] xtable_1.8-4              evaluate_1.0.5           
##  [59] S4Arrays_1.4.1            BiocFileCache_2.12.0     
##  [61] irlba_2.3.5.1             colorspace_2.1-2         
##  [63] filelock_1.0.3            hdf5r_1.3.12             
##  [65] isoband_0.2.7             matrixTests_0.2.3.1      
##  [67] ROCR_1.0-11               reticulate_1.44.1        
##  [69] spatstat.data_3.1-9       magrittr_2.0.4           
##  [71] lmtest_0.9-40             later_1.4.4              
##  [73] viridis_0.6.5             ggtree_3.12.0            
##  [75] lattice_0.22-7            robustbase_0.99-6        
##  [77] spatstat.geom_3.6-1       future.apply_1.20.0      
##  [79] scattermore_1.2           XML_3.99-0.20            
##  [81] scuttle_1.14.0            shadowtext_0.1.6         
##  [83] cowplot_1.2.0             RcppAnnoy_0.0.22         
##  [85] pillar_1.11.1             nlme_3.1-168             
##  [87] compiler_4.4.1            beachmat_2.20.0          
##  [89] RSpectra_0.16-2           stringi_1.8.7            
##  [91] tensor_1.5.1              GenomicAlignments_1.40.0 
##  [93] plyr_1.8.9                crayon_1.5.3             
##  [95] abind_1.4-8               BiocIO_1.14.0            
##  [97] scater_1.32.1             gridGraphics_0.5-1       
##  [99] locfit_1.5-9.12           graphlayouts_1.2.2       
## [101] bit_4.6.0                 fastmatch_1.1-6          
## [103] textshaping_1.0.4         codetools_0.2-20         
## [105] BiocSingular_1.20.0       bslib_0.9.0              
## [107] alabaster.ranges_1.4.2    plotly_4.11.0            
## [109] mime_0.13                 splines_4.4.1            
## [111] Rcpp_1.1.0                fastDummies_1.7.5        
## [113] dbplyr_2.5.1              sparseMatrixStats_1.16.0 
## [115] knitr_1.50                blob_1.2.4               
## [117] BiocVersion_3.19.1        fs_1.6.6                 
## [119] listenv_0.10.0            DelayedMatrixStats_1.26.0
## [121] ggplotify_0.1.3           tibble_3.3.0             
## [123] statmod_1.5.1             tweenr_2.0.3             
## [125] pkgconfig_2.0.3           tools_4.4.1              
## [127] cachem_1.1.0              RSQLite_2.4.5            
## [129] viridisLite_0.4.2         DBI_1.2.3                
## [131] fastmap_1.2.0             rmarkdown_2.30           
## [133] scales_1.4.0              grid_4.4.1               
## [135] ica_1.0-3                 Rsamtools_2.20.0         
## [137] AnnotationHub_3.12.0      sass_0.4.10              
## [139] BiocManager_1.30.27       dotCall64_1.2            
## [141] RANN_2.6.2                alabaster.schemas_1.4.0  
## [143] farver_2.1.2              mgcv_1.9-4               
## [145] tidygraph_1.3.1           scatterpie_0.2.6         
## [147] yaml_2.3.11               bayesm_3.1-7             
## [149] rtracklayer_1.64.0        cli_3.6.5                
## [151] purrr_1.2.0               lifecycle_1.0.4          
## [153] uwot_0.2.4                presto_1.0.0             
## [155] bluster_1.14.0            BiocParallel_1.38.0      
## [157] gtable_0.3.6              rjson_0.2.23             
## [159] ggridges_0.5.7            progressr_0.18.0         
## [161] parallel_4.4.1            ape_5.8-1                
## [163] limma_3.60.6              jsonlite_2.0.0           
## [165] edgeR_4.2.2               RcppHNSW_0.6.0           
## [167] bitops_1.0-9              assertthat_0.2.1         
## [169] bit64_4.6.0-1             xgboost_1.7.11.1         
## [171] Rtsne_0.17                yulab.utils_0.2.1        
## [173] coda.base_1.0.3           alabaster.matrix_1.4.2   
## [175] spatstat.utils_3.2-0      BiocNeighbors_1.22.0     
## [177] jquerylib_0.1.4           metapod_1.12.0           
## [179] alabaster.se_1.4.1        GOSemSim_2.30.2          
## [181] dqrng_0.4.1               spatstat.univar_3.1-5    
## [183] R.utils_2.13.0            lazyeval_0.2.2           
## [185] alabaster.base_1.4.2      shiny_1.11.1             
## [187] htmltools_0.5.8.1         enrichplot_1.24.4        
## [189] GO.db_3.19.1              sctransform_0.4.2        
## [191] rappdirs_0.3.3            glue_1.8.0               
## [193] spam_2.11-1               httr2_1.2.1              
## [195] XVector_0.44.0            gdtools_0.4.4            
## [197] RCurl_1.98-1.17           treeio_1.28.0            
## [199] scran_1.32.0              mnormt_2.1.1             
## [201] gridExtra_2.3             sccore_1.0.6             
## [203] igraph_2.2.1              R6_2.6.1                 
## [205] tidyr_1.3.1               ggiraph_0.9.2            
## [207] labeling_0.4.3            cluster_2.1.8.1          
## [209] Rhdf5lib_1.26.0           aplot_0.2.9              
## [211] DelayedArray_0.30.1       tidyselect_1.2.1         
## [213] vipor_0.4.7               tensorA_0.36.2.1         
## [215] ggforce_0.5.0             fontBitstreamVera_0.1.1  
## [217] rsvd_1.0.5                KernSmooth_2.23-26       
## [219] S7_0.2.1                  fontquiver_0.2.1         
## [221] data.table_1.17.8         htmlwidgets_1.6.4        
## [223] rlang_1.1.6               spatstat.sparse_3.1-0    
## [225] spatstat.explore_3.6-0    beeswarm_0.4.0